1 /*
2  *  Copyright 2011 The LibYuv Project Authors. All rights reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS. All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include "libyuv/row.h"
12 
13 #include <string.h>  // For memcpy and memset.
14 
15 #include "libyuv/basic_types.h"
16 
17 #ifdef __cplusplus
18 namespace libyuv {
19 extern "C" {
20 #endif
21 
22 // llvm x86 is poor at ternary operator, so use branchless min/max.
23 
24 #define USE_BRANCHLESS 1
25 #if USE_BRANCHLESS
clamp0(int32 v)26 static __inline int32 clamp0(int32 v) {
27   return ((-(v) >> 31) & (v));
28 }
29 
clamp255(int32 v)30 static __inline int32 clamp255(int32 v) {
31   return (((255 - (v)) >> 31) | (v)) & 255;
32 }
33 
Clamp(int32 val)34 static __inline uint32 Clamp(int32 val) {
35   int v = clamp0(val);
36   return (uint32)(clamp255(v));
37 }
38 
Abs(int32 v)39 static __inline uint32 Abs(int32 v) {
40   int m = v >> 31;
41   return (v + m) ^ m;
42 }
43 #else   // USE_BRANCHLESS
44 static __inline int32 clamp0(int32 v) {
45   return (v < 0) ? 0 : v;
46 }
47 
48 static __inline int32 clamp255(int32 v) {
49   return (v > 255) ? 255 : v;
50 }
51 
52 static __inline uint32 Clamp(int32 val) {
53   int v = clamp0(val);
54   return (uint32)(clamp255(v));
55 }
56 
57 static __inline uint32 Abs(int32 v) {
58   return (v < 0) ? -v : v;
59 }
60 #endif  // USE_BRANCHLESS
61 
62 #ifdef LIBYUV_LITTLE_ENDIAN
63 #define WRITEWORD(p, v) *(uint32*)(p) = v
64 #else
WRITEWORD(uint8 * p,uint32 v)65 static inline void WRITEWORD(uint8* p, uint32 v) {
66   p[0] = (uint8)(v & 255);
67   p[1] = (uint8)((v >> 8) & 255);
68   p[2] = (uint8)((v >> 16) & 255);
69   p[3] = (uint8)((v >> 24) & 255);
70 }
71 #endif
72 
RGB24ToARGBRow_C(const uint8 * src_rgb24,uint8 * dst_argb,int width)73 void RGB24ToARGBRow_C(const uint8* src_rgb24, uint8* dst_argb, int width) {
74   int x;
75   for (x = 0; x < width; ++x) {
76     uint8 b = src_rgb24[0];
77     uint8 g = src_rgb24[1];
78     uint8 r = src_rgb24[2];
79     dst_argb[0] = b;
80     dst_argb[1] = g;
81     dst_argb[2] = r;
82     dst_argb[3] = 255u;
83     dst_argb += 4;
84     src_rgb24 += 3;
85   }
86 }
87 
RAWToARGBRow_C(const uint8 * src_raw,uint8 * dst_argb,int width)88 void RAWToARGBRow_C(const uint8* src_raw, uint8* dst_argb, int width) {
89   int x;
90   for (x = 0; x < width; ++x) {
91     uint8 r = src_raw[0];
92     uint8 g = src_raw[1];
93     uint8 b = src_raw[2];
94     dst_argb[0] = b;
95     dst_argb[1] = g;
96     dst_argb[2] = r;
97     dst_argb[3] = 255u;
98     dst_argb += 4;
99     src_raw += 3;
100   }
101 }
102 
RAWToRGB24Row_C(const uint8 * src_raw,uint8 * dst_rgb24,int width)103 void RAWToRGB24Row_C(const uint8* src_raw, uint8* dst_rgb24, int width) {
104   int x;
105   for (x = 0; x < width; ++x) {
106     uint8 r = src_raw[0];
107     uint8 g = src_raw[1];
108     uint8 b = src_raw[2];
109     dst_rgb24[0] = b;
110     dst_rgb24[1] = g;
111     dst_rgb24[2] = r;
112     dst_rgb24 += 3;
113     src_raw += 3;
114   }
115 }
116 
RGB565ToARGBRow_C(const uint8 * src_rgb565,uint8 * dst_argb,int width)117 void RGB565ToARGBRow_C(const uint8* src_rgb565, uint8* dst_argb, int width) {
118   int x;
119   for (x = 0; x < width; ++x) {
120     uint8 b = src_rgb565[0] & 0x1f;
121     uint8 g = (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3);
122     uint8 r = src_rgb565[1] >> 3;
123     dst_argb[0] = (b << 3) | (b >> 2);
124     dst_argb[1] = (g << 2) | (g >> 4);
125     dst_argb[2] = (r << 3) | (r >> 2);
126     dst_argb[3] = 255u;
127     dst_argb += 4;
128     src_rgb565 += 2;
129   }
130 }
131 
ARGB1555ToARGBRow_C(const uint8 * src_argb1555,uint8 * dst_argb,int width)132 void ARGB1555ToARGBRow_C(const uint8* src_argb1555,
133                          uint8* dst_argb,
134                          int width) {
135   int x;
136   for (x = 0; x < width; ++x) {
137     uint8 b = src_argb1555[0] & 0x1f;
138     uint8 g = (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3);
139     uint8 r = (src_argb1555[1] & 0x7c) >> 2;
140     uint8 a = src_argb1555[1] >> 7;
141     dst_argb[0] = (b << 3) | (b >> 2);
142     dst_argb[1] = (g << 3) | (g >> 2);
143     dst_argb[2] = (r << 3) | (r >> 2);
144     dst_argb[3] = -a;
145     dst_argb += 4;
146     src_argb1555 += 2;
147   }
148 }
149 
ARGB4444ToARGBRow_C(const uint8 * src_argb4444,uint8 * dst_argb,int width)150 void ARGB4444ToARGBRow_C(const uint8* src_argb4444,
151                          uint8* dst_argb,
152                          int width) {
153   int x;
154   for (x = 0; x < width; ++x) {
155     uint8 b = src_argb4444[0] & 0x0f;
156     uint8 g = src_argb4444[0] >> 4;
157     uint8 r = src_argb4444[1] & 0x0f;
158     uint8 a = src_argb4444[1] >> 4;
159     dst_argb[0] = (b << 4) | b;
160     dst_argb[1] = (g << 4) | g;
161     dst_argb[2] = (r << 4) | r;
162     dst_argb[3] = (a << 4) | a;
163     dst_argb += 4;
164     src_argb4444 += 2;
165   }
166 }
167 
ARGBToRGB24Row_C(const uint8 * src_argb,uint8 * dst_rgb,int width)168 void ARGBToRGB24Row_C(const uint8* src_argb, uint8* dst_rgb, int width) {
169   int x;
170   for (x = 0; x < width; ++x) {
171     uint8 b = src_argb[0];
172     uint8 g = src_argb[1];
173     uint8 r = src_argb[2];
174     dst_rgb[0] = b;
175     dst_rgb[1] = g;
176     dst_rgb[2] = r;
177     dst_rgb += 3;
178     src_argb += 4;
179   }
180 }
181 
ARGBToRAWRow_C(const uint8 * src_argb,uint8 * dst_rgb,int width)182 void ARGBToRAWRow_C(const uint8* src_argb, uint8* dst_rgb, int width) {
183   int x;
184   for (x = 0; x < width; ++x) {
185     uint8 b = src_argb[0];
186     uint8 g = src_argb[1];
187     uint8 r = src_argb[2];
188     dst_rgb[0] = r;
189     dst_rgb[1] = g;
190     dst_rgb[2] = b;
191     dst_rgb += 3;
192     src_argb += 4;
193   }
194 }
195 
ARGBToRGB565Row_C(const uint8 * src_argb,uint8 * dst_rgb,int width)196 void ARGBToRGB565Row_C(const uint8* src_argb, uint8* dst_rgb, int width) {
197   int x;
198   for (x = 0; x < width - 1; x += 2) {
199     uint8 b0 = src_argb[0] >> 3;
200     uint8 g0 = src_argb[1] >> 2;
201     uint8 r0 = src_argb[2] >> 3;
202     uint8 b1 = src_argb[4] >> 3;
203     uint8 g1 = src_argb[5] >> 2;
204     uint8 r1 = src_argb[6] >> 3;
205     WRITEWORD(dst_rgb, b0 | (g0 << 5) | (r0 << 11) | (b1 << 16) | (g1 << 21) |
206                            (r1 << 27));
207     dst_rgb += 4;
208     src_argb += 8;
209   }
210   if (width & 1) {
211     uint8 b0 = src_argb[0] >> 3;
212     uint8 g0 = src_argb[1] >> 2;
213     uint8 r0 = src_argb[2] >> 3;
214     *(uint16*)(dst_rgb) = b0 | (g0 << 5) | (r0 << 11);
215   }
216 }
217 
218 // dither4 is a row of 4 values from 4x4 dither matrix.
219 // The 4x4 matrix contains values to increase RGB.  When converting to
220 // fewer bits (565) this provides an ordered dither.
221 // The order in the 4x4 matrix in first byte is upper left.
222 // The 4 values are passed as an int, then referenced as an array, so
223 // endian will not affect order of the original matrix.  But the dither4
224 // will containing the first pixel in the lower byte for little endian
225 // or the upper byte for big endian.
ARGBToRGB565DitherRow_C(const uint8 * src_argb,uint8 * dst_rgb,const uint32 dither4,int width)226 void ARGBToRGB565DitherRow_C(const uint8* src_argb,
227                              uint8* dst_rgb,
228                              const uint32 dither4,
229                              int width) {
230   int x;
231   for (x = 0; x < width - 1; x += 2) {
232     int dither0 = ((const unsigned char*)(&dither4))[x & 3];
233     int dither1 = ((const unsigned char*)(&dither4))[(x + 1) & 3];
234     uint8 b0 = clamp255(src_argb[0] + dither0) >> 3;
235     uint8 g0 = clamp255(src_argb[1] + dither0) >> 2;
236     uint8 r0 = clamp255(src_argb[2] + dither0) >> 3;
237     uint8 b1 = clamp255(src_argb[4] + dither1) >> 3;
238     uint8 g1 = clamp255(src_argb[5] + dither1) >> 2;
239     uint8 r1 = clamp255(src_argb[6] + dither1) >> 3;
240     WRITEWORD(dst_rgb, b0 | (g0 << 5) | (r0 << 11) | (b1 << 16) | (g1 << 21) |
241                            (r1 << 27));
242     dst_rgb += 4;
243     src_argb += 8;
244   }
245   if (width & 1) {
246     int dither0 = ((const unsigned char*)(&dither4))[(width - 1) & 3];
247     uint8 b0 = clamp255(src_argb[0] + dither0) >> 3;
248     uint8 g0 = clamp255(src_argb[1] + dither0) >> 2;
249     uint8 r0 = clamp255(src_argb[2] + dither0) >> 3;
250     *(uint16*)(dst_rgb) = b0 | (g0 << 5) | (r0 << 11);
251   }
252 }
253 
ARGBToARGB1555Row_C(const uint8 * src_argb,uint8 * dst_rgb,int width)254 void ARGBToARGB1555Row_C(const uint8* src_argb, uint8* dst_rgb, int width) {
255   int x;
256   for (x = 0; x < width - 1; x += 2) {
257     uint8 b0 = src_argb[0] >> 3;
258     uint8 g0 = src_argb[1] >> 3;
259     uint8 r0 = src_argb[2] >> 3;
260     uint8 a0 = src_argb[3] >> 7;
261     uint8 b1 = src_argb[4] >> 3;
262     uint8 g1 = src_argb[5] >> 3;
263     uint8 r1 = src_argb[6] >> 3;
264     uint8 a1 = src_argb[7] >> 7;
265     *(uint32*)(dst_rgb) = b0 | (g0 << 5) | (r0 << 10) | (a0 << 15) |
266                           (b1 << 16) | (g1 << 21) | (r1 << 26) | (a1 << 31);
267     dst_rgb += 4;
268     src_argb += 8;
269   }
270   if (width & 1) {
271     uint8 b0 = src_argb[0] >> 3;
272     uint8 g0 = src_argb[1] >> 3;
273     uint8 r0 = src_argb[2] >> 3;
274     uint8 a0 = src_argb[3] >> 7;
275     *(uint16*)(dst_rgb) = b0 | (g0 << 5) | (r0 << 10) | (a0 << 15);
276   }
277 }
278 
ARGBToARGB4444Row_C(const uint8 * src_argb,uint8 * dst_rgb,int width)279 void ARGBToARGB4444Row_C(const uint8* src_argb, uint8* dst_rgb, int width) {
280   int x;
281   for (x = 0; x < width - 1; x += 2) {
282     uint8 b0 = src_argb[0] >> 4;
283     uint8 g0 = src_argb[1] >> 4;
284     uint8 r0 = src_argb[2] >> 4;
285     uint8 a0 = src_argb[3] >> 4;
286     uint8 b1 = src_argb[4] >> 4;
287     uint8 g1 = src_argb[5] >> 4;
288     uint8 r1 = src_argb[6] >> 4;
289     uint8 a1 = src_argb[7] >> 4;
290     *(uint32*)(dst_rgb) = b0 | (g0 << 4) | (r0 << 8) | (a0 << 12) | (b1 << 16) |
291                           (g1 << 20) | (r1 << 24) | (a1 << 28);
292     dst_rgb += 4;
293     src_argb += 8;
294   }
295   if (width & 1) {
296     uint8 b0 = src_argb[0] >> 4;
297     uint8 g0 = src_argb[1] >> 4;
298     uint8 r0 = src_argb[2] >> 4;
299     uint8 a0 = src_argb[3] >> 4;
300     *(uint16*)(dst_rgb) = b0 | (g0 << 4) | (r0 << 8) | (a0 << 12);
301   }
302 }
303 
RGBToY(uint8 r,uint8 g,uint8 b)304 static __inline int RGBToY(uint8 r, uint8 g, uint8 b) {
305   return (66 * r + 129 * g + 25 * b + 0x1080) >> 8;
306 }
307 
RGBToU(uint8 r,uint8 g,uint8 b)308 static __inline int RGBToU(uint8 r, uint8 g, uint8 b) {
309   return (112 * b - 74 * g - 38 * r + 0x8080) >> 8;
310 }
RGBToV(uint8 r,uint8 g,uint8 b)311 static __inline int RGBToV(uint8 r, uint8 g, uint8 b) {
312   return (112 * r - 94 * g - 18 * b + 0x8080) >> 8;
313 }
314 
315 // ARGBToY_C and ARGBToUV_C
316 #define MAKEROWY(NAME, R, G, B, BPP)                                     \
317   void NAME##ToYRow_C(const uint8* src_argb0, uint8* dst_y, int width) { \
318     int x;                                                               \
319     for (x = 0; x < width; ++x) {                                        \
320       dst_y[0] = RGBToY(src_argb0[R], src_argb0[G], src_argb0[B]);       \
321       src_argb0 += BPP;                                                  \
322       dst_y += 1;                                                        \
323     }                                                                    \
324   }                                                                      \
325   void NAME##ToUVRow_C(const uint8* src_rgb0, int src_stride_rgb,        \
326                        uint8* dst_u, uint8* dst_v, int width) {          \
327     const uint8* src_rgb1 = src_rgb0 + src_stride_rgb;                   \
328     int x;                                                               \
329     for (x = 0; x < width - 1; x += 2) {                                 \
330       uint8 ab = (src_rgb0[B] + src_rgb0[B + BPP] + src_rgb1[B] +        \
331                   src_rgb1[B + BPP]) >>                                  \
332                  2;                                                      \
333       uint8 ag = (src_rgb0[G] + src_rgb0[G + BPP] + src_rgb1[G] +        \
334                   src_rgb1[G + BPP]) >>                                  \
335                  2;                                                      \
336       uint8 ar = (src_rgb0[R] + src_rgb0[R + BPP] + src_rgb1[R] +        \
337                   src_rgb1[R + BPP]) >>                                  \
338                  2;                                                      \
339       dst_u[0] = RGBToU(ar, ag, ab);                                     \
340       dst_v[0] = RGBToV(ar, ag, ab);                                     \
341       src_rgb0 += BPP * 2;                                               \
342       src_rgb1 += BPP * 2;                                               \
343       dst_u += 1;                                                        \
344       dst_v += 1;                                                        \
345     }                                                                    \
346     if (width & 1) {                                                     \
347       uint8 ab = (src_rgb0[B] + src_rgb1[B]) >> 1;                       \
348       uint8 ag = (src_rgb0[G] + src_rgb1[G]) >> 1;                       \
349       uint8 ar = (src_rgb0[R] + src_rgb1[R]) >> 1;                       \
350       dst_u[0] = RGBToU(ar, ag, ab);                                     \
351       dst_v[0] = RGBToV(ar, ag, ab);                                     \
352     }                                                                    \
353   }
354 
355 MAKEROWY(ARGB, 2, 1, 0, 4)
356 MAKEROWY(BGRA, 1, 2, 3, 4)
357 MAKEROWY(ABGR, 0, 1, 2, 4)
358 MAKEROWY(RGBA, 3, 2, 1, 4)
359 MAKEROWY(RGB24, 2, 1, 0, 3)
360 MAKEROWY(RAW, 0, 1, 2, 3)
361 #undef MAKEROWY
362 
363 // JPeg uses a variation on BT.601-1 full range
364 // y =  0.29900 * r + 0.58700 * g + 0.11400 * b
365 // u = -0.16874 * r - 0.33126 * g + 0.50000 * b  + center
366 // v =  0.50000 * r - 0.41869 * g - 0.08131 * b  + center
367 // BT.601 Mpeg range uses:
368 // b 0.1016 * 255 = 25.908 = 25
369 // g 0.5078 * 255 = 129.489 = 129
370 // r 0.2578 * 255 = 65.739 = 66
371 // JPeg 8 bit Y (not used):
372 // b 0.11400 * 256 = 29.184 = 29
373 // g 0.58700 * 256 = 150.272 = 150
374 // r 0.29900 * 256 = 76.544 = 77
375 // JPeg 7 bit Y:
376 // b 0.11400 * 128 = 14.592 = 15
377 // g 0.58700 * 128 = 75.136 = 75
378 // r 0.29900 * 128 = 38.272 = 38
379 // JPeg 8 bit U:
380 // b  0.50000 * 255 = 127.5 = 127
381 // g -0.33126 * 255 = -84.4713 = -84
382 // r -0.16874 * 255 = -43.0287 = -43
383 // JPeg 8 bit V:
384 // b -0.08131 * 255 = -20.73405 = -20
385 // g -0.41869 * 255 = -106.76595 = -107
386 // r  0.50000 * 255 = 127.5 = 127
387 
RGBToYJ(uint8 r,uint8 g,uint8 b)388 static __inline int RGBToYJ(uint8 r, uint8 g, uint8 b) {
389   return (38 * r + 75 * g + 15 * b + 64) >> 7;
390 }
391 
RGBToUJ(uint8 r,uint8 g,uint8 b)392 static __inline int RGBToUJ(uint8 r, uint8 g, uint8 b) {
393   return (127 * b - 84 * g - 43 * r + 0x8080) >> 8;
394 }
RGBToVJ(uint8 r,uint8 g,uint8 b)395 static __inline int RGBToVJ(uint8 r, uint8 g, uint8 b) {
396   return (127 * r - 107 * g - 20 * b + 0x8080) >> 8;
397 }
398 
399 #define AVGB(a, b) (((a) + (b) + 1) >> 1)
400 
401 // ARGBToYJ_C and ARGBToUVJ_C
402 #define MAKEROWYJ(NAME, R, G, B, BPP)                                     \
403   void NAME##ToYJRow_C(const uint8* src_argb0, uint8* dst_y, int width) { \
404     int x;                                                                \
405     for (x = 0; x < width; ++x) {                                         \
406       dst_y[0] = RGBToYJ(src_argb0[R], src_argb0[G], src_argb0[B]);       \
407       src_argb0 += BPP;                                                   \
408       dst_y += 1;                                                         \
409     }                                                                     \
410   }                                                                       \
411   void NAME##ToUVJRow_C(const uint8* src_rgb0, int src_stride_rgb,        \
412                         uint8* dst_u, uint8* dst_v, int width) {          \
413     const uint8* src_rgb1 = src_rgb0 + src_stride_rgb;                    \
414     int x;                                                                \
415     for (x = 0; x < width - 1; x += 2) {                                  \
416       uint8 ab = AVGB(AVGB(src_rgb0[B], src_rgb1[B]),                     \
417                       AVGB(src_rgb0[B + BPP], src_rgb1[B + BPP]));        \
418       uint8 ag = AVGB(AVGB(src_rgb0[G], src_rgb1[G]),                     \
419                       AVGB(src_rgb0[G + BPP], src_rgb1[G + BPP]));        \
420       uint8 ar = AVGB(AVGB(src_rgb0[R], src_rgb1[R]),                     \
421                       AVGB(src_rgb0[R + BPP], src_rgb1[R + BPP]));        \
422       dst_u[0] = RGBToUJ(ar, ag, ab);                                     \
423       dst_v[0] = RGBToVJ(ar, ag, ab);                                     \
424       src_rgb0 += BPP * 2;                                                \
425       src_rgb1 += BPP * 2;                                                \
426       dst_u += 1;                                                         \
427       dst_v += 1;                                                         \
428     }                                                                     \
429     if (width & 1) {                                                      \
430       uint8 ab = AVGB(src_rgb0[B], src_rgb1[B]);                          \
431       uint8 ag = AVGB(src_rgb0[G], src_rgb1[G]);                          \
432       uint8 ar = AVGB(src_rgb0[R], src_rgb1[R]);                          \
433       dst_u[0] = RGBToUJ(ar, ag, ab);                                     \
434       dst_v[0] = RGBToVJ(ar, ag, ab);                                     \
435     }                                                                     \
436   }
437 
438 MAKEROWYJ(ARGB, 2, 1, 0, 4)
439 #undef MAKEROWYJ
440 
RGB565ToYRow_C(const uint8 * src_rgb565,uint8 * dst_y,int width)441 void RGB565ToYRow_C(const uint8* src_rgb565, uint8* dst_y, int width) {
442   int x;
443   for (x = 0; x < width; ++x) {
444     uint8 b = src_rgb565[0] & 0x1f;
445     uint8 g = (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3);
446     uint8 r = src_rgb565[1] >> 3;
447     b = (b << 3) | (b >> 2);
448     g = (g << 2) | (g >> 4);
449     r = (r << 3) | (r >> 2);
450     dst_y[0] = RGBToY(r, g, b);
451     src_rgb565 += 2;
452     dst_y += 1;
453   }
454 }
455 
ARGB1555ToYRow_C(const uint8 * src_argb1555,uint8 * dst_y,int width)456 void ARGB1555ToYRow_C(const uint8* src_argb1555, uint8* dst_y, int width) {
457   int x;
458   for (x = 0; x < width; ++x) {
459     uint8 b = src_argb1555[0] & 0x1f;
460     uint8 g = (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3);
461     uint8 r = (src_argb1555[1] & 0x7c) >> 2;
462     b = (b << 3) | (b >> 2);
463     g = (g << 3) | (g >> 2);
464     r = (r << 3) | (r >> 2);
465     dst_y[0] = RGBToY(r, g, b);
466     src_argb1555 += 2;
467     dst_y += 1;
468   }
469 }
470 
ARGB4444ToYRow_C(const uint8 * src_argb4444,uint8 * dst_y,int width)471 void ARGB4444ToYRow_C(const uint8* src_argb4444, uint8* dst_y, int width) {
472   int x;
473   for (x = 0; x < width; ++x) {
474     uint8 b = src_argb4444[0] & 0x0f;
475     uint8 g = src_argb4444[0] >> 4;
476     uint8 r = src_argb4444[1] & 0x0f;
477     b = (b << 4) | b;
478     g = (g << 4) | g;
479     r = (r << 4) | r;
480     dst_y[0] = RGBToY(r, g, b);
481     src_argb4444 += 2;
482     dst_y += 1;
483   }
484 }
485 
RGB565ToUVRow_C(const uint8 * src_rgb565,int src_stride_rgb565,uint8 * dst_u,uint8 * dst_v,int width)486 void RGB565ToUVRow_C(const uint8* src_rgb565,
487                      int src_stride_rgb565,
488                      uint8* dst_u,
489                      uint8* dst_v,
490                      int width) {
491   const uint8* next_rgb565 = src_rgb565 + src_stride_rgb565;
492   int x;
493   for (x = 0; x < width - 1; x += 2) {
494     uint8 b0 = src_rgb565[0] & 0x1f;
495     uint8 g0 = (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3);
496     uint8 r0 = src_rgb565[1] >> 3;
497     uint8 b1 = src_rgb565[2] & 0x1f;
498     uint8 g1 = (src_rgb565[2] >> 5) | ((src_rgb565[3] & 0x07) << 3);
499     uint8 r1 = src_rgb565[3] >> 3;
500     uint8 b2 = next_rgb565[0] & 0x1f;
501     uint8 g2 = (next_rgb565[0] >> 5) | ((next_rgb565[1] & 0x07) << 3);
502     uint8 r2 = next_rgb565[1] >> 3;
503     uint8 b3 = next_rgb565[2] & 0x1f;
504     uint8 g3 = (next_rgb565[2] >> 5) | ((next_rgb565[3] & 0x07) << 3);
505     uint8 r3 = next_rgb565[3] >> 3;
506     uint8 b = (b0 + b1 + b2 + b3);  // 565 * 4 = 787.
507     uint8 g = (g0 + g1 + g2 + g3);
508     uint8 r = (r0 + r1 + r2 + r3);
509     b = (b << 1) | (b >> 6);  // 787 -> 888.
510     r = (r << 1) | (r >> 6);
511     dst_u[0] = RGBToU(r, g, b);
512     dst_v[0] = RGBToV(r, g, b);
513     src_rgb565 += 4;
514     next_rgb565 += 4;
515     dst_u += 1;
516     dst_v += 1;
517   }
518   if (width & 1) {
519     uint8 b0 = src_rgb565[0] & 0x1f;
520     uint8 g0 = (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3);
521     uint8 r0 = src_rgb565[1] >> 3;
522     uint8 b2 = next_rgb565[0] & 0x1f;
523     uint8 g2 = (next_rgb565[0] >> 5) | ((next_rgb565[1] & 0x07) << 3);
524     uint8 r2 = next_rgb565[1] >> 3;
525     uint8 b = (b0 + b2);  // 565 * 2 = 676.
526     uint8 g = (g0 + g2);
527     uint8 r = (r0 + r2);
528     b = (b << 2) | (b >> 4);  // 676 -> 888
529     g = (g << 1) | (g >> 6);
530     r = (r << 2) | (r >> 4);
531     dst_u[0] = RGBToU(r, g, b);
532     dst_v[0] = RGBToV(r, g, b);
533   }
534 }
535 
ARGB1555ToUVRow_C(const uint8 * src_argb1555,int src_stride_argb1555,uint8 * dst_u,uint8 * dst_v,int width)536 void ARGB1555ToUVRow_C(const uint8* src_argb1555,
537                        int src_stride_argb1555,
538                        uint8* dst_u,
539                        uint8* dst_v,
540                        int width) {
541   const uint8* next_argb1555 = src_argb1555 + src_stride_argb1555;
542   int x;
543   for (x = 0; x < width - 1; x += 2) {
544     uint8 b0 = src_argb1555[0] & 0x1f;
545     uint8 g0 = (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3);
546     uint8 r0 = (src_argb1555[1] & 0x7c) >> 2;
547     uint8 b1 = src_argb1555[2] & 0x1f;
548     uint8 g1 = (src_argb1555[2] >> 5) | ((src_argb1555[3] & 0x03) << 3);
549     uint8 r1 = (src_argb1555[3] & 0x7c) >> 2;
550     uint8 b2 = next_argb1555[0] & 0x1f;
551     uint8 g2 = (next_argb1555[0] >> 5) | ((next_argb1555[1] & 0x03) << 3);
552     uint8 r2 = (next_argb1555[1] & 0x7c) >> 2;
553     uint8 b3 = next_argb1555[2] & 0x1f;
554     uint8 g3 = (next_argb1555[2] >> 5) | ((next_argb1555[3] & 0x03) << 3);
555     uint8 r3 = (next_argb1555[3] & 0x7c) >> 2;
556     uint8 b = (b0 + b1 + b2 + b3);  // 555 * 4 = 777.
557     uint8 g = (g0 + g1 + g2 + g3);
558     uint8 r = (r0 + r1 + r2 + r3);
559     b = (b << 1) | (b >> 6);  // 777 -> 888.
560     g = (g << 1) | (g >> 6);
561     r = (r << 1) | (r >> 6);
562     dst_u[0] = RGBToU(r, g, b);
563     dst_v[0] = RGBToV(r, g, b);
564     src_argb1555 += 4;
565     next_argb1555 += 4;
566     dst_u += 1;
567     dst_v += 1;
568   }
569   if (width & 1) {
570     uint8 b0 = src_argb1555[0] & 0x1f;
571     uint8 g0 = (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3);
572     uint8 r0 = (src_argb1555[1] & 0x7c) >> 2;
573     uint8 b2 = next_argb1555[0] & 0x1f;
574     uint8 g2 = (next_argb1555[0] >> 5) | ((next_argb1555[1] & 0x03) << 3);
575     uint8 r2 = next_argb1555[1] >> 3;
576     uint8 b = (b0 + b2);  // 555 * 2 = 666.
577     uint8 g = (g0 + g2);
578     uint8 r = (r0 + r2);
579     b = (b << 2) | (b >> 4);  // 666 -> 888.
580     g = (g << 2) | (g >> 4);
581     r = (r << 2) | (r >> 4);
582     dst_u[0] = RGBToU(r, g, b);
583     dst_v[0] = RGBToV(r, g, b);
584   }
585 }
586 
ARGB4444ToUVRow_C(const uint8 * src_argb4444,int src_stride_argb4444,uint8 * dst_u,uint8 * dst_v,int width)587 void ARGB4444ToUVRow_C(const uint8* src_argb4444,
588                        int src_stride_argb4444,
589                        uint8* dst_u,
590                        uint8* dst_v,
591                        int width) {
592   const uint8* next_argb4444 = src_argb4444 + src_stride_argb4444;
593   int x;
594   for (x = 0; x < width - 1; x += 2) {
595     uint8 b0 = src_argb4444[0] & 0x0f;
596     uint8 g0 = src_argb4444[0] >> 4;
597     uint8 r0 = src_argb4444[1] & 0x0f;
598     uint8 b1 = src_argb4444[2] & 0x0f;
599     uint8 g1 = src_argb4444[2] >> 4;
600     uint8 r1 = src_argb4444[3] & 0x0f;
601     uint8 b2 = next_argb4444[0] & 0x0f;
602     uint8 g2 = next_argb4444[0] >> 4;
603     uint8 r2 = next_argb4444[1] & 0x0f;
604     uint8 b3 = next_argb4444[2] & 0x0f;
605     uint8 g3 = next_argb4444[2] >> 4;
606     uint8 r3 = next_argb4444[3] & 0x0f;
607     uint8 b = (b0 + b1 + b2 + b3);  // 444 * 4 = 666.
608     uint8 g = (g0 + g1 + g2 + g3);
609     uint8 r = (r0 + r1 + r2 + r3);
610     b = (b << 2) | (b >> 4);  // 666 -> 888.
611     g = (g << 2) | (g >> 4);
612     r = (r << 2) | (r >> 4);
613     dst_u[0] = RGBToU(r, g, b);
614     dst_v[0] = RGBToV(r, g, b);
615     src_argb4444 += 4;
616     next_argb4444 += 4;
617     dst_u += 1;
618     dst_v += 1;
619   }
620   if (width & 1) {
621     uint8 b0 = src_argb4444[0] & 0x0f;
622     uint8 g0 = src_argb4444[0] >> 4;
623     uint8 r0 = src_argb4444[1] & 0x0f;
624     uint8 b2 = next_argb4444[0] & 0x0f;
625     uint8 g2 = next_argb4444[0] >> 4;
626     uint8 r2 = next_argb4444[1] & 0x0f;
627     uint8 b = (b0 + b2);  // 444 * 2 = 555.
628     uint8 g = (g0 + g2);
629     uint8 r = (r0 + r2);
630     b = (b << 3) | (b >> 2);  // 555 -> 888.
631     g = (g << 3) | (g >> 2);
632     r = (r << 3) | (r >> 2);
633     dst_u[0] = RGBToU(r, g, b);
634     dst_v[0] = RGBToV(r, g, b);
635   }
636 }
637 
ARGBToUV444Row_C(const uint8 * src_argb,uint8 * dst_u,uint8 * dst_v,int width)638 void ARGBToUV444Row_C(const uint8* src_argb,
639                       uint8* dst_u,
640                       uint8* dst_v,
641                       int width) {
642   int x;
643   for (x = 0; x < width; ++x) {
644     uint8 ab = src_argb[0];
645     uint8 ag = src_argb[1];
646     uint8 ar = src_argb[2];
647     dst_u[0] = RGBToU(ar, ag, ab);
648     dst_v[0] = RGBToV(ar, ag, ab);
649     src_argb += 4;
650     dst_u += 1;
651     dst_v += 1;
652   }
653 }
654 
ARGBGrayRow_C(const uint8 * src_argb,uint8 * dst_argb,int width)655 void ARGBGrayRow_C(const uint8* src_argb, uint8* dst_argb, int width) {
656   int x;
657   for (x = 0; x < width; ++x) {
658     uint8 y = RGBToYJ(src_argb[2], src_argb[1], src_argb[0]);
659     dst_argb[2] = dst_argb[1] = dst_argb[0] = y;
660     dst_argb[3] = src_argb[3];
661     dst_argb += 4;
662     src_argb += 4;
663   }
664 }
665 
666 // Convert a row of image to Sepia tone.
ARGBSepiaRow_C(uint8 * dst_argb,int width)667 void ARGBSepiaRow_C(uint8* dst_argb, int width) {
668   int x;
669   for (x = 0; x < width; ++x) {
670     int b = dst_argb[0];
671     int g = dst_argb[1];
672     int r = dst_argb[2];
673     int sb = (b * 17 + g * 68 + r * 35) >> 7;
674     int sg = (b * 22 + g * 88 + r * 45) >> 7;
675     int sr = (b * 24 + g * 98 + r * 50) >> 7;
676     // b does not over flow. a is preserved from original.
677     dst_argb[0] = sb;
678     dst_argb[1] = clamp255(sg);
679     dst_argb[2] = clamp255(sr);
680     dst_argb += 4;
681   }
682 }
683 
684 // Apply color matrix to a row of image. Matrix is signed.
685 // TODO(fbarchard): Consider adding rounding (+32).
ARGBColorMatrixRow_C(const uint8 * src_argb,uint8 * dst_argb,const int8 * matrix_argb,int width)686 void ARGBColorMatrixRow_C(const uint8* src_argb,
687                           uint8* dst_argb,
688                           const int8* matrix_argb,
689                           int width) {
690   int x;
691   for (x = 0; x < width; ++x) {
692     int b = src_argb[0];
693     int g = src_argb[1];
694     int r = src_argb[2];
695     int a = src_argb[3];
696     int sb = (b * matrix_argb[0] + g * matrix_argb[1] + r * matrix_argb[2] +
697               a * matrix_argb[3]) >>
698              6;
699     int sg = (b * matrix_argb[4] + g * matrix_argb[5] + r * matrix_argb[6] +
700               a * matrix_argb[7]) >>
701              6;
702     int sr = (b * matrix_argb[8] + g * matrix_argb[9] + r * matrix_argb[10] +
703               a * matrix_argb[11]) >>
704              6;
705     int sa = (b * matrix_argb[12] + g * matrix_argb[13] + r * matrix_argb[14] +
706               a * matrix_argb[15]) >>
707              6;
708     dst_argb[0] = Clamp(sb);
709     dst_argb[1] = Clamp(sg);
710     dst_argb[2] = Clamp(sr);
711     dst_argb[3] = Clamp(sa);
712     src_argb += 4;
713     dst_argb += 4;
714   }
715 }
716 
717 // Apply color table to a row of image.
ARGBColorTableRow_C(uint8 * dst_argb,const uint8 * table_argb,int width)718 void ARGBColorTableRow_C(uint8* dst_argb, const uint8* table_argb, int width) {
719   int x;
720   for (x = 0; x < width; ++x) {
721     int b = dst_argb[0];
722     int g = dst_argb[1];
723     int r = dst_argb[2];
724     int a = dst_argb[3];
725     dst_argb[0] = table_argb[b * 4 + 0];
726     dst_argb[1] = table_argb[g * 4 + 1];
727     dst_argb[2] = table_argb[r * 4 + 2];
728     dst_argb[3] = table_argb[a * 4 + 3];
729     dst_argb += 4;
730   }
731 }
732 
733 // Apply color table to a row of image.
RGBColorTableRow_C(uint8 * dst_argb,const uint8 * table_argb,int width)734 void RGBColorTableRow_C(uint8* dst_argb, const uint8* table_argb, int width) {
735   int x;
736   for (x = 0; x < width; ++x) {
737     int b = dst_argb[0];
738     int g = dst_argb[1];
739     int r = dst_argb[2];
740     dst_argb[0] = table_argb[b * 4 + 0];
741     dst_argb[1] = table_argb[g * 4 + 1];
742     dst_argb[2] = table_argb[r * 4 + 2];
743     dst_argb += 4;
744   }
745 }
746 
ARGBQuantizeRow_C(uint8 * dst_argb,int scale,int interval_size,int interval_offset,int width)747 void ARGBQuantizeRow_C(uint8* dst_argb,
748                        int scale,
749                        int interval_size,
750                        int interval_offset,
751                        int width) {
752   int x;
753   for (x = 0; x < width; ++x) {
754     int b = dst_argb[0];
755     int g = dst_argb[1];
756     int r = dst_argb[2];
757     dst_argb[0] = (b * scale >> 16) * interval_size + interval_offset;
758     dst_argb[1] = (g * scale >> 16) * interval_size + interval_offset;
759     dst_argb[2] = (r * scale >> 16) * interval_size + interval_offset;
760     dst_argb += 4;
761   }
762 }
763 
764 #define REPEAT8(v) (v) | ((v) << 8)
765 #define SHADE(f, v) v* f >> 24
766 
ARGBShadeRow_C(const uint8 * src_argb,uint8 * dst_argb,int width,uint32 value)767 void ARGBShadeRow_C(const uint8* src_argb,
768                     uint8* dst_argb,
769                     int width,
770                     uint32 value) {
771   const uint32 b_scale = REPEAT8(value & 0xff);
772   const uint32 g_scale = REPEAT8((value >> 8) & 0xff);
773   const uint32 r_scale = REPEAT8((value >> 16) & 0xff);
774   const uint32 a_scale = REPEAT8(value >> 24);
775 
776   int i;
777   for (i = 0; i < width; ++i) {
778     const uint32 b = REPEAT8(src_argb[0]);
779     const uint32 g = REPEAT8(src_argb[1]);
780     const uint32 r = REPEAT8(src_argb[2]);
781     const uint32 a = REPEAT8(src_argb[3]);
782     dst_argb[0] = SHADE(b, b_scale);
783     dst_argb[1] = SHADE(g, g_scale);
784     dst_argb[2] = SHADE(r, r_scale);
785     dst_argb[3] = SHADE(a, a_scale);
786     src_argb += 4;
787     dst_argb += 4;
788   }
789 }
790 #undef REPEAT8
791 #undef SHADE
792 
793 #define REPEAT8(v) (v) | ((v) << 8)
794 #define SHADE(f, v) v* f >> 16
795 
ARGBMultiplyRow_C(const uint8 * src_argb0,const uint8 * src_argb1,uint8 * dst_argb,int width)796 void ARGBMultiplyRow_C(const uint8* src_argb0,
797                        const uint8* src_argb1,
798                        uint8* dst_argb,
799                        int width) {
800   int i;
801   for (i = 0; i < width; ++i) {
802     const uint32 b = REPEAT8(src_argb0[0]);
803     const uint32 g = REPEAT8(src_argb0[1]);
804     const uint32 r = REPEAT8(src_argb0[2]);
805     const uint32 a = REPEAT8(src_argb0[3]);
806     const uint32 b_scale = src_argb1[0];
807     const uint32 g_scale = src_argb1[1];
808     const uint32 r_scale = src_argb1[2];
809     const uint32 a_scale = src_argb1[3];
810     dst_argb[0] = SHADE(b, b_scale);
811     dst_argb[1] = SHADE(g, g_scale);
812     dst_argb[2] = SHADE(r, r_scale);
813     dst_argb[3] = SHADE(a, a_scale);
814     src_argb0 += 4;
815     src_argb1 += 4;
816     dst_argb += 4;
817   }
818 }
819 #undef REPEAT8
820 #undef SHADE
821 
822 #define SHADE(f, v) clamp255(v + f)
823 
ARGBAddRow_C(const uint8 * src_argb0,const uint8 * src_argb1,uint8 * dst_argb,int width)824 void ARGBAddRow_C(const uint8* src_argb0,
825                   const uint8* src_argb1,
826                   uint8* dst_argb,
827                   int width) {
828   int i;
829   for (i = 0; i < width; ++i) {
830     const int b = src_argb0[0];
831     const int g = src_argb0[1];
832     const int r = src_argb0[2];
833     const int a = src_argb0[3];
834     const int b_add = src_argb1[0];
835     const int g_add = src_argb1[1];
836     const int r_add = src_argb1[2];
837     const int a_add = src_argb1[3];
838     dst_argb[0] = SHADE(b, b_add);
839     dst_argb[1] = SHADE(g, g_add);
840     dst_argb[2] = SHADE(r, r_add);
841     dst_argb[3] = SHADE(a, a_add);
842     src_argb0 += 4;
843     src_argb1 += 4;
844     dst_argb += 4;
845   }
846 }
847 #undef SHADE
848 
849 #define SHADE(f, v) clamp0(f - v)
850 
ARGBSubtractRow_C(const uint8 * src_argb0,const uint8 * src_argb1,uint8 * dst_argb,int width)851 void ARGBSubtractRow_C(const uint8* src_argb0,
852                        const uint8* src_argb1,
853                        uint8* dst_argb,
854                        int width) {
855   int i;
856   for (i = 0; i < width; ++i) {
857     const int b = src_argb0[0];
858     const int g = src_argb0[1];
859     const int r = src_argb0[2];
860     const int a = src_argb0[3];
861     const int b_sub = src_argb1[0];
862     const int g_sub = src_argb1[1];
863     const int r_sub = src_argb1[2];
864     const int a_sub = src_argb1[3];
865     dst_argb[0] = SHADE(b, b_sub);
866     dst_argb[1] = SHADE(g, g_sub);
867     dst_argb[2] = SHADE(r, r_sub);
868     dst_argb[3] = SHADE(a, a_sub);
869     src_argb0 += 4;
870     src_argb1 += 4;
871     dst_argb += 4;
872   }
873 }
874 #undef SHADE
875 
876 // Sobel functions which mimics SSSE3.
SobelXRow_C(const uint8 * src_y0,const uint8 * src_y1,const uint8 * src_y2,uint8 * dst_sobelx,int width)877 void SobelXRow_C(const uint8* src_y0,
878                  const uint8* src_y1,
879                  const uint8* src_y2,
880                  uint8* dst_sobelx,
881                  int width) {
882   int i;
883   for (i = 0; i < width; ++i) {
884     int a = src_y0[i];
885     int b = src_y1[i];
886     int c = src_y2[i];
887     int a_sub = src_y0[i + 2];
888     int b_sub = src_y1[i + 2];
889     int c_sub = src_y2[i + 2];
890     int a_diff = a - a_sub;
891     int b_diff = b - b_sub;
892     int c_diff = c - c_sub;
893     int sobel = Abs(a_diff + b_diff * 2 + c_diff);
894     dst_sobelx[i] = (uint8)(clamp255(sobel));
895   }
896 }
897 
SobelYRow_C(const uint8 * src_y0,const uint8 * src_y1,uint8 * dst_sobely,int width)898 void SobelYRow_C(const uint8* src_y0,
899                  const uint8* src_y1,
900                  uint8* dst_sobely,
901                  int width) {
902   int i;
903   for (i = 0; i < width; ++i) {
904     int a = src_y0[i + 0];
905     int b = src_y0[i + 1];
906     int c = src_y0[i + 2];
907     int a_sub = src_y1[i + 0];
908     int b_sub = src_y1[i + 1];
909     int c_sub = src_y1[i + 2];
910     int a_diff = a - a_sub;
911     int b_diff = b - b_sub;
912     int c_diff = c - c_sub;
913     int sobel = Abs(a_diff + b_diff * 2 + c_diff);
914     dst_sobely[i] = (uint8)(clamp255(sobel));
915   }
916 }
917 
SobelRow_C(const uint8 * src_sobelx,const uint8 * src_sobely,uint8 * dst_argb,int width)918 void SobelRow_C(const uint8* src_sobelx,
919                 const uint8* src_sobely,
920                 uint8* dst_argb,
921                 int width) {
922   int i;
923   for (i = 0; i < width; ++i) {
924     int r = src_sobelx[i];
925     int b = src_sobely[i];
926     int s = clamp255(r + b);
927     dst_argb[0] = (uint8)(s);
928     dst_argb[1] = (uint8)(s);
929     dst_argb[2] = (uint8)(s);
930     dst_argb[3] = (uint8)(255u);
931     dst_argb += 4;
932   }
933 }
934 
SobelToPlaneRow_C(const uint8 * src_sobelx,const uint8 * src_sobely,uint8 * dst_y,int width)935 void SobelToPlaneRow_C(const uint8* src_sobelx,
936                        const uint8* src_sobely,
937                        uint8* dst_y,
938                        int width) {
939   int i;
940   for (i = 0; i < width; ++i) {
941     int r = src_sobelx[i];
942     int b = src_sobely[i];
943     int s = clamp255(r + b);
944     dst_y[i] = (uint8)(s);
945   }
946 }
947 
SobelXYRow_C(const uint8 * src_sobelx,const uint8 * src_sobely,uint8 * dst_argb,int width)948 void SobelXYRow_C(const uint8* src_sobelx,
949                   const uint8* src_sobely,
950                   uint8* dst_argb,
951                   int width) {
952   int i;
953   for (i = 0; i < width; ++i) {
954     int r = src_sobelx[i];
955     int b = src_sobely[i];
956     int g = clamp255(r + b);
957     dst_argb[0] = (uint8)(b);
958     dst_argb[1] = (uint8)(g);
959     dst_argb[2] = (uint8)(r);
960     dst_argb[3] = (uint8)(255u);
961     dst_argb += 4;
962   }
963 }
964 
J400ToARGBRow_C(const uint8 * src_y,uint8 * dst_argb,int width)965 void J400ToARGBRow_C(const uint8* src_y, uint8* dst_argb, int width) {
966   // Copy a Y to RGB.
967   int x;
968   for (x = 0; x < width; ++x) {
969     uint8 y = src_y[0];
970     dst_argb[2] = dst_argb[1] = dst_argb[0] = y;
971     dst_argb[3] = 255u;
972     dst_argb += 4;
973     ++src_y;
974   }
975 }
976 
977 // TODO(fbarchard): Unify these structures to be platform independent.
978 // TODO(fbarchard): Generate SIMD structures from float matrix.
979 
980 // BT.601 YUV to RGB reference
981 //  R = (Y - 16) * 1.164              - V * -1.596
982 //  G = (Y - 16) * 1.164 - U *  0.391 - V *  0.813
983 //  B = (Y - 16) * 1.164 - U * -2.018
984 
985 // Y contribution to R,G,B.  Scale and bias.
986 #define YG 18997  /* round(1.164 * 64 * 256 * 256 / 257) */
987 #define YGB -1160 /* 1.164 * 64 * -16 + 64 / 2 */
988 
989 // U and V contributions to R,G,B.
990 #define UB -128 /* max(-128, round(-2.018 * 64)) */
991 #define UG 25   /* round(0.391 * 64) */
992 #define VG 52   /* round(0.813 * 64) */
993 #define VR -102 /* round(-1.596 * 64) */
994 
995 // Bias values to subtract 16 from Y and 128 from U and V.
996 #define BB (UB * 128 + YGB)
997 #define BG (UG * 128 + VG * 128 + YGB)
998 #define BR (VR * 128 + YGB)
999 
1000 #if defined(__aarch64__)  // 64 bit arm
1001 const struct YuvConstants SIMD_ALIGNED(kYuvI601Constants) = {
1002     {-UB, -VR, -UB, -VR, -UB, -VR, -UB, -VR},
1003     {-UB, -VR, -UB, -VR, -UB, -VR, -UB, -VR},
1004     {UG, VG, UG, VG, UG, VG, UG, VG},
1005     {UG, VG, UG, VG, UG, VG, UG, VG},
1006     {BB, BG, BR, 0, 0, 0, 0, 0},
1007     {0x0101 * YG, 0, 0, 0}};
1008 const struct YuvConstants SIMD_ALIGNED(kYvuI601Constants) = {
1009     {-VR, -UB, -VR, -UB, -VR, -UB, -VR, -UB},
1010     {-VR, -UB, -VR, -UB, -VR, -UB, -VR, -UB},
1011     {VG, UG, VG, UG, VG, UG, VG, UG},
1012     {VG, UG, VG, UG, VG, UG, VG, UG},
1013     {BR, BG, BB, 0, 0, 0, 0, 0},
1014     {0x0101 * YG, 0, 0, 0}};
1015 #elif defined(__arm__)  // 32 bit arm
1016 const struct YuvConstants SIMD_ALIGNED(kYuvI601Constants) = {
1017     {-UB, -UB, -UB, -UB, -VR, -VR, -VR, -VR, 0, 0, 0, 0, 0, 0, 0, 0},
1018     {UG, UG, UG, UG, VG, VG, VG, VG, 0, 0, 0, 0, 0, 0, 0, 0},
1019     {BB, BG, BR, 0, 0, 0, 0, 0},
1020     {0x0101 * YG, 0, 0, 0}};
1021 const struct YuvConstants SIMD_ALIGNED(kYvuI601Constants) = {
1022     {-VR, -VR, -VR, -VR, -UB, -UB, -UB, -UB, 0, 0, 0, 0, 0, 0, 0, 0},
1023     {VG, VG, VG, VG, UG, UG, UG, UG, 0, 0, 0, 0, 0, 0, 0, 0},
1024     {BR, BG, BB, 0, 0, 0, 0, 0},
1025     {0x0101 * YG, 0, 0, 0}};
1026 #else
1027 const struct YuvConstants SIMD_ALIGNED(kYuvI601Constants) = {
1028     {UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0,
1029      UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0},
1030     {UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG,
1031      UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG},
1032     {0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR,
1033      0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR},
1034     {BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB},
1035     {BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG},
1036     {BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR},
1037     {YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG}};
1038 const struct YuvConstants SIMD_ALIGNED(kYvuI601Constants) = {
1039     {VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0,
1040      VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0},
1041     {VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG,
1042      VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG},
1043     {0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB,
1044      0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB},
1045     {BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR},
1046     {BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG},
1047     {BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB},
1048     {YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG}};
1049 #endif
1050 
1051 #undef BB
1052 #undef BG
1053 #undef BR
1054 #undef YGB
1055 #undef UB
1056 #undef UG
1057 #undef VG
1058 #undef VR
1059 #undef YG
1060 
1061 // JPEG YUV to RGB reference
1062 // *  R = Y                - V * -1.40200
1063 // *  G = Y - U *  0.34414 - V *  0.71414
1064 // *  B = Y - U * -1.77200
1065 
1066 // Y contribution to R,G,B.  Scale and bias.
1067 #define YG 16320 /* round(1.000 * 64 * 256 * 256 / 257) */
1068 #define YGB 32   /* 64 / 2 */
1069 
1070 // U and V contributions to R,G,B.
1071 #define UB -113 /* round(-1.77200 * 64) */
1072 #define UG 22   /* round(0.34414 * 64) */
1073 #define VG 46   /* round(0.71414  * 64) */
1074 #define VR -90  /* round(-1.40200 * 64) */
1075 
1076 // Bias values to round, and subtract 128 from U and V.
1077 #define BB (UB * 128 + YGB)
1078 #define BG (UG * 128 + VG * 128 + YGB)
1079 #define BR (VR * 128 + YGB)
1080 
1081 #if defined(__aarch64__)
1082 const struct YuvConstants SIMD_ALIGNED(kYuvJPEGConstants) = {
1083     {-UB, -VR, -UB, -VR, -UB, -VR, -UB, -VR},
1084     {-UB, -VR, -UB, -VR, -UB, -VR, -UB, -VR},
1085     {UG, VG, UG, VG, UG, VG, UG, VG},
1086     {UG, VG, UG, VG, UG, VG, UG, VG},
1087     {BB, BG, BR, 0, 0, 0, 0, 0},
1088     {0x0101 * YG, 0, 0, 0}};
1089 const struct YuvConstants SIMD_ALIGNED(kYvuJPEGConstants) = {
1090     {-VR, -UB, -VR, -UB, -VR, -UB, -VR, -UB},
1091     {-VR, -UB, -VR, -UB, -VR, -UB, -VR, -UB},
1092     {VG, UG, VG, UG, VG, UG, VG, UG},
1093     {VG, UG, VG, UG, VG, UG, VG, UG},
1094     {BR, BG, BB, 0, 0, 0, 0, 0},
1095     {0x0101 * YG, 0, 0, 0}};
1096 #elif defined(__arm__)
1097 const struct YuvConstants SIMD_ALIGNED(kYuvJPEGConstants) = {
1098     {-UB, -UB, -UB, -UB, -VR, -VR, -VR, -VR, 0, 0, 0, 0, 0, 0, 0, 0},
1099     {UG, UG, UG, UG, VG, VG, VG, VG, 0, 0, 0, 0, 0, 0, 0, 0},
1100     {BB, BG, BR, 0, 0, 0, 0, 0},
1101     {0x0101 * YG, 0, 0, 0}};
1102 const struct YuvConstants SIMD_ALIGNED(kYvuJPEGConstants) = {
1103     {-VR, -VR, -VR, -VR, -UB, -UB, -UB, -UB, 0, 0, 0, 0, 0, 0, 0, 0},
1104     {VG, VG, VG, VG, UG, UG, UG, UG, 0, 0, 0, 0, 0, 0, 0, 0},
1105     {BR, BG, BB, 0, 0, 0, 0, 0},
1106     {0x0101 * YG, 0, 0, 0}};
1107 #else
1108 const struct YuvConstants SIMD_ALIGNED(kYuvJPEGConstants) = {
1109     {UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0,
1110      UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0},
1111     {UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG,
1112      UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG},
1113     {0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR,
1114      0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR},
1115     {BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB},
1116     {BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG},
1117     {BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR},
1118     {YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG}};
1119 const struct YuvConstants SIMD_ALIGNED(kYvuJPEGConstants) = {
1120     {VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0,
1121      VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0},
1122     {VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG,
1123      VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG},
1124     {0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB,
1125      0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB},
1126     {BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR},
1127     {BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG},
1128     {BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB},
1129     {YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG}};
1130 #endif
1131 
1132 #undef BB
1133 #undef BG
1134 #undef BR
1135 #undef YGB
1136 #undef UB
1137 #undef UG
1138 #undef VG
1139 #undef VR
1140 #undef YG
1141 
1142 // BT.709 YUV to RGB reference
1143 //  R = (Y - 16) * 1.164              - V * -1.793
1144 //  G = (Y - 16) * 1.164 - U *  0.213 - V *  0.533
1145 //  B = (Y - 16) * 1.164 - U * -2.112
1146 // See also http://www.equasys.de/colorconversion.html
1147 
1148 // Y contribution to R,G,B.  Scale and bias.
1149 #define YG 18997  /* round(1.164 * 64 * 256 * 256 / 257) */
1150 #define YGB -1160 /* 1.164 * 64 * -16 + 64 / 2 */
1151 
1152 // TODO(fbarchard): Find way to express 2.112 instead of 2.0.
1153 // U and V contributions to R,G,B.
1154 #define UB -128 /* max(-128, round(-2.112 * 64)) */
1155 #define UG 14   /* round(0.213 * 64) */
1156 #define VG 34   /* round(0.533  * 64) */
1157 #define VR -115 /* round(-1.793 * 64) */
1158 
1159 // Bias values to round, and subtract 128 from U and V.
1160 #define BB (UB * 128 + YGB)
1161 #define BG (UG * 128 + VG * 128 + YGB)
1162 #define BR (VR * 128 + YGB)
1163 
1164 #if defined(__aarch64__)
1165 const struct YuvConstants SIMD_ALIGNED(kYuvH709Constants) = {
1166     {-UB, -VR, -UB, -VR, -UB, -VR, -UB, -VR},
1167     {-UB, -VR, -UB, -VR, -UB, -VR, -UB, -VR},
1168     {UG, VG, UG, VG, UG, VG, UG, VG},
1169     {UG, VG, UG, VG, UG, VG, UG, VG},
1170     {BB, BG, BR, 0, 0, 0, 0, 0},
1171     {0x0101 * YG, 0, 0, 0}};
1172 const struct YuvConstants SIMD_ALIGNED(kYvuH709Constants) = {
1173     {-VR, -UB, -VR, -UB, -VR, -UB, -VR, -UB},
1174     {-VR, -UB, -VR, -UB, -VR, -UB, -VR, -UB},
1175     {VG, UG, VG, UG, VG, UG, VG, UG},
1176     {VG, UG, VG, UG, VG, UG, VG, UG},
1177     {BR, BG, BB, 0, 0, 0, 0, 0},
1178     {0x0101 * YG, 0, 0, 0}};
1179 #elif defined(__arm__)
1180 const struct YuvConstants SIMD_ALIGNED(kYuvH709Constants) = {
1181     {-UB, -UB, -UB, -UB, -VR, -VR, -VR, -VR, 0, 0, 0, 0, 0, 0, 0, 0},
1182     {UG, UG, UG, UG, VG, VG, VG, VG, 0, 0, 0, 0, 0, 0, 0, 0},
1183     {BB, BG, BR, 0, 0, 0, 0, 0},
1184     {0x0101 * YG, 0, 0, 0}};
1185 const struct YuvConstants SIMD_ALIGNED(kYvuH709Constants) = {
1186     {-VR, -VR, -VR, -VR, -UB, -UB, -UB, -UB, 0, 0, 0, 0, 0, 0, 0, 0},
1187     {VG, VG, VG, VG, UG, UG, UG, UG, 0, 0, 0, 0, 0, 0, 0, 0},
1188     {BR, BG, BB, 0, 0, 0, 0, 0},
1189     {0x0101 * YG, 0, 0, 0}};
1190 #else
1191 const struct YuvConstants SIMD_ALIGNED(kYuvH709Constants) = {
1192     {UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0,
1193      UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0},
1194     {UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG,
1195      UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG},
1196     {0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR,
1197      0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR},
1198     {BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB},
1199     {BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG},
1200     {BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR},
1201     {YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG}};
1202 const struct YuvConstants SIMD_ALIGNED(kYvuH709Constants) = {
1203     {VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0,
1204      VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0},
1205     {VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG,
1206      VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG},
1207     {0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB,
1208      0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB},
1209     {BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR},
1210     {BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG},
1211     {BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB},
1212     {YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG}};
1213 #endif
1214 
1215 #undef BB
1216 #undef BG
1217 #undef BR
1218 #undef YGB
1219 #undef UB
1220 #undef UG
1221 #undef VG
1222 #undef VR
1223 #undef YG
1224 
1225 // C reference code that mimics the YUV assembly.
YuvPixel(uint8 y,uint8 u,uint8 v,uint8 * b,uint8 * g,uint8 * r,const struct YuvConstants * yuvconstants)1226 static __inline void YuvPixel(uint8 y,
1227                               uint8 u,
1228                               uint8 v,
1229                               uint8* b,
1230                               uint8* g,
1231                               uint8* r,
1232                               const struct YuvConstants* yuvconstants) {
1233 #if defined(__aarch64__)
1234   int ub = -yuvconstants->kUVToRB[0];
1235   int ug = yuvconstants->kUVToG[0];
1236   int vg = yuvconstants->kUVToG[1];
1237   int vr = -yuvconstants->kUVToRB[1];
1238   int bb = yuvconstants->kUVBiasBGR[0];
1239   int bg = yuvconstants->kUVBiasBGR[1];
1240   int br = yuvconstants->kUVBiasBGR[2];
1241   int yg = yuvconstants->kYToRgb[0] / 0x0101;
1242 #elif defined(__arm__)
1243   int ub = -yuvconstants->kUVToRB[0];
1244   int ug = yuvconstants->kUVToG[0];
1245   int vg = yuvconstants->kUVToG[4];
1246   int vr = -yuvconstants->kUVToRB[4];
1247   int bb = yuvconstants->kUVBiasBGR[0];
1248   int bg = yuvconstants->kUVBiasBGR[1];
1249   int br = yuvconstants->kUVBiasBGR[2];
1250   int yg = yuvconstants->kYToRgb[0] / 0x0101;
1251 #else
1252   int ub = yuvconstants->kUVToB[0];
1253   int ug = yuvconstants->kUVToG[0];
1254   int vg = yuvconstants->kUVToG[1];
1255   int vr = yuvconstants->kUVToR[1];
1256   int bb = yuvconstants->kUVBiasB[0];
1257   int bg = yuvconstants->kUVBiasG[0];
1258   int br = yuvconstants->kUVBiasR[0];
1259   int yg = yuvconstants->kYToRgb[0];
1260 #endif
1261 
1262   uint32 y1 = (uint32)(y * 0x0101 * yg) >> 16;
1263   *b = Clamp((int32)(-(u * ub) + y1 + bb) >> 6);
1264   *g = Clamp((int32)(-(u * ug + v * vg) + y1 + bg) >> 6);
1265   *r = Clamp((int32)(-(v * vr) + y1 + br) >> 6);
1266 }
1267 
1268 // Y contribution to R,G,B.  Scale and bias.
1269 #define YG 18997  /* round(1.164 * 64 * 256 * 256 / 257) */
1270 #define YGB -1160 /* 1.164 * 64 * -16 + 64 / 2 */
1271 
1272 // C reference code that mimics the YUV assembly.
YPixel(uint8 y,uint8 * b,uint8 * g,uint8 * r)1273 static __inline void YPixel(uint8 y, uint8* b, uint8* g, uint8* r) {
1274   uint32 y1 = (uint32)(y * 0x0101 * YG) >> 16;
1275   *b = Clamp((int32)(y1 + YGB) >> 6);
1276   *g = Clamp((int32)(y1 + YGB) >> 6);
1277   *r = Clamp((int32)(y1 + YGB) >> 6);
1278 }
1279 
1280 #undef YG
1281 #undef YGB
1282 
1283 #if !defined(LIBYUV_DISABLE_NEON) && \
1284     (defined(__ARM_NEON__) || defined(__aarch64__) || defined(LIBYUV_NEON))
1285 // C mimic assembly.
1286 // TODO(fbarchard): Remove subsampling from Neon.
I444ToARGBRow_C(const uint8 * src_y,const uint8 * src_u,const uint8 * src_v,uint8 * rgb_buf,const struct YuvConstants * yuvconstants,int width)1287 void I444ToARGBRow_C(const uint8* src_y,
1288                      const uint8* src_u,
1289                      const uint8* src_v,
1290                      uint8* rgb_buf,
1291                      const struct YuvConstants* yuvconstants,
1292                      int width) {
1293   int x;
1294   for (x = 0; x < width - 1; x += 2) {
1295     uint8 u = (src_u[0] + src_u[1] + 1) >> 1;
1296     uint8 v = (src_v[0] + src_v[1] + 1) >> 1;
1297     YuvPixel(src_y[0], u, v, rgb_buf + 0, rgb_buf + 1, rgb_buf + 2,
1298              yuvconstants);
1299     rgb_buf[3] = 255;
1300     YuvPixel(src_y[1], u, v, rgb_buf + 4, rgb_buf + 5, rgb_buf + 6,
1301              yuvconstants);
1302     rgb_buf[7] = 255;
1303     src_y += 2;
1304     src_u += 2;
1305     src_v += 2;
1306     rgb_buf += 8;  // Advance 2 pixels.
1307   }
1308   if (width & 1) {
1309     YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
1310              rgb_buf + 2, yuvconstants);
1311     rgb_buf[3] = 255;
1312   }
1313 }
1314 #else
I444ToARGBRow_C(const uint8 * src_y,const uint8 * src_u,const uint8 * src_v,uint8 * rgb_buf,const struct YuvConstants * yuvconstants,int width)1315 void I444ToARGBRow_C(const uint8* src_y,
1316                      const uint8* src_u,
1317                      const uint8* src_v,
1318                      uint8* rgb_buf,
1319                      const struct YuvConstants* yuvconstants,
1320                      int width) {
1321   int x;
1322   for (x = 0; x < width; ++x) {
1323     YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
1324              rgb_buf + 2, yuvconstants);
1325     rgb_buf[3] = 255;
1326     src_y += 1;
1327     src_u += 1;
1328     src_v += 1;
1329     rgb_buf += 4;  // Advance 1 pixel.
1330   }
1331 }
1332 #endif
1333 
1334 // Also used for 420
I422ToARGBRow_C(const uint8 * src_y,const uint8 * src_u,const uint8 * src_v,uint8 * rgb_buf,const struct YuvConstants * yuvconstants,int width)1335 void I422ToARGBRow_C(const uint8* src_y,
1336                      const uint8* src_u,
1337                      const uint8* src_v,
1338                      uint8* rgb_buf,
1339                      const struct YuvConstants* yuvconstants,
1340                      int width) {
1341   int x;
1342   for (x = 0; x < width - 1; x += 2) {
1343     YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
1344              rgb_buf + 2, yuvconstants);
1345     rgb_buf[3] = 255;
1346     YuvPixel(src_y[1], src_u[0], src_v[0], rgb_buf + 4, rgb_buf + 5,
1347              rgb_buf + 6, yuvconstants);
1348     rgb_buf[7] = 255;
1349     src_y += 2;
1350     src_u += 1;
1351     src_v += 1;
1352     rgb_buf += 8;  // Advance 2 pixels.
1353   }
1354   if (width & 1) {
1355     YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
1356              rgb_buf + 2, yuvconstants);
1357     rgb_buf[3] = 255;
1358   }
1359 }
1360 
I422AlphaToARGBRow_C(const uint8 * src_y,const uint8 * src_u,const uint8 * src_v,const uint8 * src_a,uint8 * rgb_buf,const struct YuvConstants * yuvconstants,int width)1361 void I422AlphaToARGBRow_C(const uint8* src_y,
1362                           const uint8* src_u,
1363                           const uint8* src_v,
1364                           const uint8* src_a,
1365                           uint8* rgb_buf,
1366                           const struct YuvConstants* yuvconstants,
1367                           int width) {
1368   int x;
1369   for (x = 0; x < width - 1; x += 2) {
1370     YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
1371              rgb_buf + 2, yuvconstants);
1372     rgb_buf[3] = src_a[0];
1373     YuvPixel(src_y[1], src_u[0], src_v[0], rgb_buf + 4, rgb_buf + 5,
1374              rgb_buf + 6, yuvconstants);
1375     rgb_buf[7] = src_a[1];
1376     src_y += 2;
1377     src_u += 1;
1378     src_v += 1;
1379     src_a += 2;
1380     rgb_buf += 8;  // Advance 2 pixels.
1381   }
1382   if (width & 1) {
1383     YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
1384              rgb_buf + 2, yuvconstants);
1385     rgb_buf[3] = src_a[0];
1386   }
1387 }
1388 
I422ToRGB24Row_C(const uint8 * src_y,const uint8 * src_u,const uint8 * src_v,uint8 * rgb_buf,const struct YuvConstants * yuvconstants,int width)1389 void I422ToRGB24Row_C(const uint8* src_y,
1390                       const uint8* src_u,
1391                       const uint8* src_v,
1392                       uint8* rgb_buf,
1393                       const struct YuvConstants* yuvconstants,
1394                       int width) {
1395   int x;
1396   for (x = 0; x < width - 1; x += 2) {
1397     YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
1398              rgb_buf + 2, yuvconstants);
1399     YuvPixel(src_y[1], src_u[0], src_v[0], rgb_buf + 3, rgb_buf + 4,
1400              rgb_buf + 5, yuvconstants);
1401     src_y += 2;
1402     src_u += 1;
1403     src_v += 1;
1404     rgb_buf += 6;  // Advance 2 pixels.
1405   }
1406   if (width & 1) {
1407     YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
1408              rgb_buf + 2, yuvconstants);
1409   }
1410 }
1411 
I422ToARGB4444Row_C(const uint8 * src_y,const uint8 * src_u,const uint8 * src_v,uint8 * dst_argb4444,const struct YuvConstants * yuvconstants,int width)1412 void I422ToARGB4444Row_C(const uint8* src_y,
1413                          const uint8* src_u,
1414                          const uint8* src_v,
1415                          uint8* dst_argb4444,
1416                          const struct YuvConstants* yuvconstants,
1417                          int width) {
1418   uint8 b0;
1419   uint8 g0;
1420   uint8 r0;
1421   uint8 b1;
1422   uint8 g1;
1423   uint8 r1;
1424   int x;
1425   for (x = 0; x < width - 1; x += 2) {
1426     YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0, yuvconstants);
1427     YuvPixel(src_y[1], src_u[0], src_v[0], &b1, &g1, &r1, yuvconstants);
1428     b0 = b0 >> 4;
1429     g0 = g0 >> 4;
1430     r0 = r0 >> 4;
1431     b1 = b1 >> 4;
1432     g1 = g1 >> 4;
1433     r1 = r1 >> 4;
1434     *(uint32*)(dst_argb4444) = b0 | (g0 << 4) | (r0 << 8) | (b1 << 16) |
1435                                (g1 << 20) | (r1 << 24) | 0xf000f000;
1436     src_y += 2;
1437     src_u += 1;
1438     src_v += 1;
1439     dst_argb4444 += 4;  // Advance 2 pixels.
1440   }
1441   if (width & 1) {
1442     YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0, yuvconstants);
1443     b0 = b0 >> 4;
1444     g0 = g0 >> 4;
1445     r0 = r0 >> 4;
1446     *(uint16*)(dst_argb4444) = b0 | (g0 << 4) | (r0 << 8) | 0xf000;
1447   }
1448 }
1449 
I422ToARGB1555Row_C(const uint8 * src_y,const uint8 * src_u,const uint8 * src_v,uint8 * dst_argb1555,const struct YuvConstants * yuvconstants,int width)1450 void I422ToARGB1555Row_C(const uint8* src_y,
1451                          const uint8* src_u,
1452                          const uint8* src_v,
1453                          uint8* dst_argb1555,
1454                          const struct YuvConstants* yuvconstants,
1455                          int width) {
1456   uint8 b0;
1457   uint8 g0;
1458   uint8 r0;
1459   uint8 b1;
1460   uint8 g1;
1461   uint8 r1;
1462   int x;
1463   for (x = 0; x < width - 1; x += 2) {
1464     YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0, yuvconstants);
1465     YuvPixel(src_y[1], src_u[0], src_v[0], &b1, &g1, &r1, yuvconstants);
1466     b0 = b0 >> 3;
1467     g0 = g0 >> 3;
1468     r0 = r0 >> 3;
1469     b1 = b1 >> 3;
1470     g1 = g1 >> 3;
1471     r1 = r1 >> 3;
1472     *(uint32*)(dst_argb1555) = b0 | (g0 << 5) | (r0 << 10) | (b1 << 16) |
1473                                (g1 << 21) | (r1 << 26) | 0x80008000;
1474     src_y += 2;
1475     src_u += 1;
1476     src_v += 1;
1477     dst_argb1555 += 4;  // Advance 2 pixels.
1478   }
1479   if (width & 1) {
1480     YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0, yuvconstants);
1481     b0 = b0 >> 3;
1482     g0 = g0 >> 3;
1483     r0 = r0 >> 3;
1484     *(uint16*)(dst_argb1555) = b0 | (g0 << 5) | (r0 << 10) | 0x8000;
1485   }
1486 }
1487 
I422ToRGB565Row_C(const uint8 * src_y,const uint8 * src_u,const uint8 * src_v,uint8 * dst_rgb565,const struct YuvConstants * yuvconstants,int width)1488 void I422ToRGB565Row_C(const uint8* src_y,
1489                        const uint8* src_u,
1490                        const uint8* src_v,
1491                        uint8* dst_rgb565,
1492                        const struct YuvConstants* yuvconstants,
1493                        int width) {
1494   uint8 b0;
1495   uint8 g0;
1496   uint8 r0;
1497   uint8 b1;
1498   uint8 g1;
1499   uint8 r1;
1500   int x;
1501   for (x = 0; x < width - 1; x += 2) {
1502     YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0, yuvconstants);
1503     YuvPixel(src_y[1], src_u[0], src_v[0], &b1, &g1, &r1, yuvconstants);
1504     b0 = b0 >> 3;
1505     g0 = g0 >> 2;
1506     r0 = r0 >> 3;
1507     b1 = b1 >> 3;
1508     g1 = g1 >> 2;
1509     r1 = r1 >> 3;
1510     *(uint32*)(dst_rgb565) =
1511         b0 | (g0 << 5) | (r0 << 11) | (b1 << 16) | (g1 << 21) | (r1 << 27);
1512     src_y += 2;
1513     src_u += 1;
1514     src_v += 1;
1515     dst_rgb565 += 4;  // Advance 2 pixels.
1516   }
1517   if (width & 1) {
1518     YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0, yuvconstants);
1519     b0 = b0 >> 3;
1520     g0 = g0 >> 2;
1521     r0 = r0 >> 3;
1522     *(uint16*)(dst_rgb565) = b0 | (g0 << 5) | (r0 << 11);
1523   }
1524 }
1525 
NV12ToARGBRow_C(const uint8 * src_y,const uint8 * src_uv,uint8 * rgb_buf,const struct YuvConstants * yuvconstants,int width)1526 void NV12ToARGBRow_C(const uint8* src_y,
1527                      const uint8* src_uv,
1528                      uint8* rgb_buf,
1529                      const struct YuvConstants* yuvconstants,
1530                      int width) {
1531   int x;
1532   for (x = 0; x < width - 1; x += 2) {
1533     YuvPixel(src_y[0], src_uv[0], src_uv[1], rgb_buf + 0, rgb_buf + 1,
1534              rgb_buf + 2, yuvconstants);
1535     rgb_buf[3] = 255;
1536     YuvPixel(src_y[1], src_uv[0], src_uv[1], rgb_buf + 4, rgb_buf + 5,
1537              rgb_buf + 6, yuvconstants);
1538     rgb_buf[7] = 255;
1539     src_y += 2;
1540     src_uv += 2;
1541     rgb_buf += 8;  // Advance 2 pixels.
1542   }
1543   if (width & 1) {
1544     YuvPixel(src_y[0], src_uv[0], src_uv[1], rgb_buf + 0, rgb_buf + 1,
1545              rgb_buf + 2, yuvconstants);
1546     rgb_buf[3] = 255;
1547   }
1548 }
1549 
NV21ToARGBRow_C(const uint8 * src_y,const uint8 * src_vu,uint8 * rgb_buf,const struct YuvConstants * yuvconstants,int width)1550 void NV21ToARGBRow_C(const uint8* src_y,
1551                      const uint8* src_vu,
1552                      uint8* rgb_buf,
1553                      const struct YuvConstants* yuvconstants,
1554                      int width) {
1555   int x;
1556   for (x = 0; x < width - 1; x += 2) {
1557     YuvPixel(src_y[0], src_vu[1], src_vu[0], rgb_buf + 0, rgb_buf + 1,
1558              rgb_buf + 2, yuvconstants);
1559     rgb_buf[3] = 255;
1560     YuvPixel(src_y[1], src_vu[1], src_vu[0], rgb_buf + 4, rgb_buf + 5,
1561              rgb_buf + 6, yuvconstants);
1562     rgb_buf[7] = 255;
1563     src_y += 2;
1564     src_vu += 2;
1565     rgb_buf += 8;  // Advance 2 pixels.
1566   }
1567   if (width & 1) {
1568     YuvPixel(src_y[0], src_vu[1], src_vu[0], rgb_buf + 0, rgb_buf + 1,
1569              rgb_buf + 2, yuvconstants);
1570     rgb_buf[3] = 255;
1571   }
1572 }
1573 
NV12ToRGB565Row_C(const uint8 * src_y,const uint8 * src_uv,uint8 * dst_rgb565,const struct YuvConstants * yuvconstants,int width)1574 void NV12ToRGB565Row_C(const uint8* src_y,
1575                        const uint8* src_uv,
1576                        uint8* dst_rgb565,
1577                        const struct YuvConstants* yuvconstants,
1578                        int width) {
1579   uint8 b0;
1580   uint8 g0;
1581   uint8 r0;
1582   uint8 b1;
1583   uint8 g1;
1584   uint8 r1;
1585   int x;
1586   for (x = 0; x < width - 1; x += 2) {
1587     YuvPixel(src_y[0], src_uv[0], src_uv[1], &b0, &g0, &r0, yuvconstants);
1588     YuvPixel(src_y[1], src_uv[0], src_uv[1], &b1, &g1, &r1, yuvconstants);
1589     b0 = b0 >> 3;
1590     g0 = g0 >> 2;
1591     r0 = r0 >> 3;
1592     b1 = b1 >> 3;
1593     g1 = g1 >> 2;
1594     r1 = r1 >> 3;
1595     *(uint32*)(dst_rgb565) =
1596         b0 | (g0 << 5) | (r0 << 11) | (b1 << 16) | (g1 << 21) | (r1 << 27);
1597     src_y += 2;
1598     src_uv += 2;
1599     dst_rgb565 += 4;  // Advance 2 pixels.
1600   }
1601   if (width & 1) {
1602     YuvPixel(src_y[0], src_uv[0], src_uv[1], &b0, &g0, &r0, yuvconstants);
1603     b0 = b0 >> 3;
1604     g0 = g0 >> 2;
1605     r0 = r0 >> 3;
1606     *(uint16*)(dst_rgb565) = b0 | (g0 << 5) | (r0 << 11);
1607   }
1608 }
1609 
YUY2ToARGBRow_C(const uint8 * src_yuy2,uint8 * rgb_buf,const struct YuvConstants * yuvconstants,int width)1610 void YUY2ToARGBRow_C(const uint8* src_yuy2,
1611                      uint8* rgb_buf,
1612                      const struct YuvConstants* yuvconstants,
1613                      int width) {
1614   int x;
1615   for (x = 0; x < width - 1; x += 2) {
1616     YuvPixel(src_yuy2[0], src_yuy2[1], src_yuy2[3], rgb_buf + 0, rgb_buf + 1,
1617              rgb_buf + 2, yuvconstants);
1618     rgb_buf[3] = 255;
1619     YuvPixel(src_yuy2[2], src_yuy2[1], src_yuy2[3], rgb_buf + 4, rgb_buf + 5,
1620              rgb_buf + 6, yuvconstants);
1621     rgb_buf[7] = 255;
1622     src_yuy2 += 4;
1623     rgb_buf += 8;  // Advance 2 pixels.
1624   }
1625   if (width & 1) {
1626     YuvPixel(src_yuy2[0], src_yuy2[1], src_yuy2[3], rgb_buf + 0, rgb_buf + 1,
1627              rgb_buf + 2, yuvconstants);
1628     rgb_buf[3] = 255;
1629   }
1630 }
1631 
UYVYToARGBRow_C(const uint8 * src_uyvy,uint8 * rgb_buf,const struct YuvConstants * yuvconstants,int width)1632 void UYVYToARGBRow_C(const uint8* src_uyvy,
1633                      uint8* rgb_buf,
1634                      const struct YuvConstants* yuvconstants,
1635                      int width) {
1636   int x;
1637   for (x = 0; x < width - 1; x += 2) {
1638     YuvPixel(src_uyvy[1], src_uyvy[0], src_uyvy[2], rgb_buf + 0, rgb_buf + 1,
1639              rgb_buf + 2, yuvconstants);
1640     rgb_buf[3] = 255;
1641     YuvPixel(src_uyvy[3], src_uyvy[0], src_uyvy[2], rgb_buf + 4, rgb_buf + 5,
1642              rgb_buf + 6, yuvconstants);
1643     rgb_buf[7] = 255;
1644     src_uyvy += 4;
1645     rgb_buf += 8;  // Advance 2 pixels.
1646   }
1647   if (width & 1) {
1648     YuvPixel(src_uyvy[1], src_uyvy[0], src_uyvy[2], rgb_buf + 0, rgb_buf + 1,
1649              rgb_buf + 2, yuvconstants);
1650     rgb_buf[3] = 255;
1651   }
1652 }
1653 
I422ToRGBARow_C(const uint8 * src_y,const uint8 * src_u,const uint8 * src_v,uint8 * rgb_buf,const struct YuvConstants * yuvconstants,int width)1654 void I422ToRGBARow_C(const uint8* src_y,
1655                      const uint8* src_u,
1656                      const uint8* src_v,
1657                      uint8* rgb_buf,
1658                      const struct YuvConstants* yuvconstants,
1659                      int width) {
1660   int x;
1661   for (x = 0; x < width - 1; x += 2) {
1662     YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 1, rgb_buf + 2,
1663              rgb_buf + 3, yuvconstants);
1664     rgb_buf[0] = 255;
1665     YuvPixel(src_y[1], src_u[0], src_v[0], rgb_buf + 5, rgb_buf + 6,
1666              rgb_buf + 7, yuvconstants);
1667     rgb_buf[4] = 255;
1668     src_y += 2;
1669     src_u += 1;
1670     src_v += 1;
1671     rgb_buf += 8;  // Advance 2 pixels.
1672   }
1673   if (width & 1) {
1674     YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 1, rgb_buf + 2,
1675              rgb_buf + 3, yuvconstants);
1676     rgb_buf[0] = 255;
1677   }
1678 }
1679 
I400ToARGBRow_C(const uint8 * src_y,uint8 * rgb_buf,int width)1680 void I400ToARGBRow_C(const uint8* src_y, uint8* rgb_buf, int width) {
1681   int x;
1682   for (x = 0; x < width - 1; x += 2) {
1683     YPixel(src_y[0], rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
1684     rgb_buf[3] = 255;
1685     YPixel(src_y[1], rgb_buf + 4, rgb_buf + 5, rgb_buf + 6);
1686     rgb_buf[7] = 255;
1687     src_y += 2;
1688     rgb_buf += 8;  // Advance 2 pixels.
1689   }
1690   if (width & 1) {
1691     YPixel(src_y[0], rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
1692     rgb_buf[3] = 255;
1693   }
1694 }
1695 
MirrorRow_C(const uint8 * src,uint8 * dst,int width)1696 void MirrorRow_C(const uint8* src, uint8* dst, int width) {
1697   int x;
1698   src += width - 1;
1699   for (x = 0; x < width - 1; x += 2) {
1700     dst[x] = src[0];
1701     dst[x + 1] = src[-1];
1702     src -= 2;
1703   }
1704   if (width & 1) {
1705     dst[width - 1] = src[0];
1706   }
1707 }
1708 
MirrorUVRow_C(const uint8 * src_uv,uint8 * dst_u,uint8 * dst_v,int width)1709 void MirrorUVRow_C(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int width) {
1710   int x;
1711   src_uv += (width - 1) << 1;
1712   for (x = 0; x < width - 1; x += 2) {
1713     dst_u[x] = src_uv[0];
1714     dst_u[x + 1] = src_uv[-2];
1715     dst_v[x] = src_uv[1];
1716     dst_v[x + 1] = src_uv[-2 + 1];
1717     src_uv -= 4;
1718   }
1719   if (width & 1) {
1720     dst_u[width - 1] = src_uv[0];
1721     dst_v[width - 1] = src_uv[1];
1722   }
1723 }
1724 
ARGBMirrorRow_C(const uint8 * src,uint8 * dst,int width)1725 void ARGBMirrorRow_C(const uint8* src, uint8* dst, int width) {
1726   int x;
1727   const uint32* src32 = (const uint32*)(src);
1728   uint32* dst32 = (uint32*)(dst);
1729   src32 += width - 1;
1730   for (x = 0; x < width - 1; x += 2) {
1731     dst32[x] = src32[0];
1732     dst32[x + 1] = src32[-1];
1733     src32 -= 2;
1734   }
1735   if (width & 1) {
1736     dst32[width - 1] = src32[0];
1737   }
1738 }
1739 
SplitUVRow_C(const uint8 * src_uv,uint8 * dst_u,uint8 * dst_v,int width)1740 void SplitUVRow_C(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int width) {
1741   int x;
1742   for (x = 0; x < width - 1; x += 2) {
1743     dst_u[x] = src_uv[0];
1744     dst_u[x + 1] = src_uv[2];
1745     dst_v[x] = src_uv[1];
1746     dst_v[x + 1] = src_uv[3];
1747     src_uv += 4;
1748   }
1749   if (width & 1) {
1750     dst_u[width - 1] = src_uv[0];
1751     dst_v[width - 1] = src_uv[1];
1752   }
1753 }
1754 
MergeUVRow_C(const uint8 * src_u,const uint8 * src_v,uint8 * dst_uv,int width)1755 void MergeUVRow_C(const uint8* src_u,
1756                   const uint8* src_v,
1757                   uint8* dst_uv,
1758                   int width) {
1759   int x;
1760   for (x = 0; x < width - 1; x += 2) {
1761     dst_uv[0] = src_u[x];
1762     dst_uv[1] = src_v[x];
1763     dst_uv[2] = src_u[x + 1];
1764     dst_uv[3] = src_v[x + 1];
1765     dst_uv += 4;
1766   }
1767   if (width & 1) {
1768     dst_uv[0] = src_u[width - 1];
1769     dst_uv[1] = src_v[width - 1];
1770   }
1771 }
1772 
CopyRow_C(const uint8 * src,uint8 * dst,int count)1773 void CopyRow_C(const uint8* src, uint8* dst, int count) {
1774   memcpy(dst, src, count);
1775 }
1776 
CopyRow_16_C(const uint16 * src,uint16 * dst,int count)1777 void CopyRow_16_C(const uint16* src, uint16* dst, int count) {
1778   memcpy(dst, src, count * 2);
1779 }
1780 
SetRow_C(uint8 * dst,uint8 v8,int width)1781 void SetRow_C(uint8* dst, uint8 v8, int width) {
1782   memset(dst, v8, width);
1783 }
1784 
ARGBSetRow_C(uint8 * dst_argb,uint32 v32,int width)1785 void ARGBSetRow_C(uint8* dst_argb, uint32 v32, int width) {
1786   uint32* d = (uint32*)(dst_argb);
1787   int x;
1788   for (x = 0; x < width; ++x) {
1789     d[x] = v32;
1790   }
1791 }
1792 
1793 // Filter 2 rows of YUY2 UV's (422) into U and V (420).
YUY2ToUVRow_C(const uint8 * src_yuy2,int src_stride_yuy2,uint8 * dst_u,uint8 * dst_v,int width)1794 void YUY2ToUVRow_C(const uint8* src_yuy2,
1795                    int src_stride_yuy2,
1796                    uint8* dst_u,
1797                    uint8* dst_v,
1798                    int width) {
1799   // Output a row of UV values, filtering 2 rows of YUY2.
1800   int x;
1801   for (x = 0; x < width; x += 2) {
1802     dst_u[0] = (src_yuy2[1] + src_yuy2[src_stride_yuy2 + 1] + 1) >> 1;
1803     dst_v[0] = (src_yuy2[3] + src_yuy2[src_stride_yuy2 + 3] + 1) >> 1;
1804     src_yuy2 += 4;
1805     dst_u += 1;
1806     dst_v += 1;
1807   }
1808 }
1809 
1810 // Copy row of YUY2 UV's (422) into U and V (422).
YUY2ToUV422Row_C(const uint8 * src_yuy2,uint8 * dst_u,uint8 * dst_v,int width)1811 void YUY2ToUV422Row_C(const uint8* src_yuy2,
1812                       uint8* dst_u,
1813                       uint8* dst_v,
1814                       int width) {
1815   // Output a row of UV values.
1816   int x;
1817   for (x = 0; x < width; x += 2) {
1818     dst_u[0] = src_yuy2[1];
1819     dst_v[0] = src_yuy2[3];
1820     src_yuy2 += 4;
1821     dst_u += 1;
1822     dst_v += 1;
1823   }
1824 }
1825 
1826 // Copy row of YUY2 Y's (422) into Y (420/422).
YUY2ToYRow_C(const uint8 * src_yuy2,uint8 * dst_y,int width)1827 void YUY2ToYRow_C(const uint8* src_yuy2, uint8* dst_y, int width) {
1828   // Output a row of Y values.
1829   int x;
1830   for (x = 0; x < width - 1; x += 2) {
1831     dst_y[x] = src_yuy2[0];
1832     dst_y[x + 1] = src_yuy2[2];
1833     src_yuy2 += 4;
1834   }
1835   if (width & 1) {
1836     dst_y[width - 1] = src_yuy2[0];
1837   }
1838 }
1839 
1840 // Filter 2 rows of UYVY UV's (422) into U and V (420).
UYVYToUVRow_C(const uint8 * src_uyvy,int src_stride_uyvy,uint8 * dst_u,uint8 * dst_v,int width)1841 void UYVYToUVRow_C(const uint8* src_uyvy,
1842                    int src_stride_uyvy,
1843                    uint8* dst_u,
1844                    uint8* dst_v,
1845                    int width) {
1846   // Output a row of UV values.
1847   int x;
1848   for (x = 0; x < width; x += 2) {
1849     dst_u[0] = (src_uyvy[0] + src_uyvy[src_stride_uyvy + 0] + 1) >> 1;
1850     dst_v[0] = (src_uyvy[2] + src_uyvy[src_stride_uyvy + 2] + 1) >> 1;
1851     src_uyvy += 4;
1852     dst_u += 1;
1853     dst_v += 1;
1854   }
1855 }
1856 
1857 // Copy row of UYVY UV's (422) into U and V (422).
UYVYToUV422Row_C(const uint8 * src_uyvy,uint8 * dst_u,uint8 * dst_v,int width)1858 void UYVYToUV422Row_C(const uint8* src_uyvy,
1859                       uint8* dst_u,
1860                       uint8* dst_v,
1861                       int width) {
1862   // Output a row of UV values.
1863   int x;
1864   for (x = 0; x < width; x += 2) {
1865     dst_u[0] = src_uyvy[0];
1866     dst_v[0] = src_uyvy[2];
1867     src_uyvy += 4;
1868     dst_u += 1;
1869     dst_v += 1;
1870   }
1871 }
1872 
1873 // Copy row of UYVY Y's (422) into Y (420/422).
UYVYToYRow_C(const uint8 * src_uyvy,uint8 * dst_y,int width)1874 void UYVYToYRow_C(const uint8* src_uyvy, uint8* dst_y, int width) {
1875   // Output a row of Y values.
1876   int x;
1877   for (x = 0; x < width - 1; x += 2) {
1878     dst_y[x] = src_uyvy[1];
1879     dst_y[x + 1] = src_uyvy[3];
1880     src_uyvy += 4;
1881   }
1882   if (width & 1) {
1883     dst_y[width - 1] = src_uyvy[1];
1884   }
1885 }
1886 
1887 #define BLEND(f, b, a) (((256 - a) * b) >> 8) + f
1888 
1889 // Blend src_argb0 over src_argb1 and store to dst_argb.
1890 // dst_argb may be src_argb0 or src_argb1.
1891 // This code mimics the SSSE3 version for better testability.
ARGBBlendRow_C(const uint8 * src_argb0,const uint8 * src_argb1,uint8 * dst_argb,int width)1892 void ARGBBlendRow_C(const uint8* src_argb0,
1893                     const uint8* src_argb1,
1894                     uint8* dst_argb,
1895                     int width) {
1896   int x;
1897   for (x = 0; x < width - 1; x += 2) {
1898     uint32 fb = src_argb0[0];
1899     uint32 fg = src_argb0[1];
1900     uint32 fr = src_argb0[2];
1901     uint32 a = src_argb0[3];
1902     uint32 bb = src_argb1[0];
1903     uint32 bg = src_argb1[1];
1904     uint32 br = src_argb1[2];
1905     dst_argb[0] = BLEND(fb, bb, a);
1906     dst_argb[1] = BLEND(fg, bg, a);
1907     dst_argb[2] = BLEND(fr, br, a);
1908     dst_argb[3] = 255u;
1909 
1910     fb = src_argb0[4 + 0];
1911     fg = src_argb0[4 + 1];
1912     fr = src_argb0[4 + 2];
1913     a = src_argb0[4 + 3];
1914     bb = src_argb1[4 + 0];
1915     bg = src_argb1[4 + 1];
1916     br = src_argb1[4 + 2];
1917     dst_argb[4 + 0] = BLEND(fb, bb, a);
1918     dst_argb[4 + 1] = BLEND(fg, bg, a);
1919     dst_argb[4 + 2] = BLEND(fr, br, a);
1920     dst_argb[4 + 3] = 255u;
1921     src_argb0 += 8;
1922     src_argb1 += 8;
1923     dst_argb += 8;
1924   }
1925 
1926   if (width & 1) {
1927     uint32 fb = src_argb0[0];
1928     uint32 fg = src_argb0[1];
1929     uint32 fr = src_argb0[2];
1930     uint32 a = src_argb0[3];
1931     uint32 bb = src_argb1[0];
1932     uint32 bg = src_argb1[1];
1933     uint32 br = src_argb1[2];
1934     dst_argb[0] = BLEND(fb, bb, a);
1935     dst_argb[1] = BLEND(fg, bg, a);
1936     dst_argb[2] = BLEND(fr, br, a);
1937     dst_argb[3] = 255u;
1938   }
1939 }
1940 #undef BLEND
1941 
1942 #define UBLEND(f, b, a) (((a)*f) + ((255 - a) * b) + 255) >> 8
BlendPlaneRow_C(const uint8 * src0,const uint8 * src1,const uint8 * alpha,uint8 * dst,int width)1943 void BlendPlaneRow_C(const uint8* src0,
1944                      const uint8* src1,
1945                      const uint8* alpha,
1946                      uint8* dst,
1947                      int width) {
1948   int x;
1949   for (x = 0; x < width - 1; x += 2) {
1950     dst[0] = UBLEND(src0[0], src1[0], alpha[0]);
1951     dst[1] = UBLEND(src0[1], src1[1], alpha[1]);
1952     src0 += 2;
1953     src1 += 2;
1954     alpha += 2;
1955     dst += 2;
1956   }
1957   if (width & 1) {
1958     dst[0] = UBLEND(src0[0], src1[0], alpha[0]);
1959   }
1960 }
1961 #undef UBLEND
1962 
1963 #define ATTENUATE(f, a) (a | (a << 8)) * (f | (f << 8)) >> 24
1964 
1965 // Multiply source RGB by alpha and store to destination.
1966 // This code mimics the SSSE3 version for better testability.
ARGBAttenuateRow_C(const uint8 * src_argb,uint8 * dst_argb,int width)1967 void ARGBAttenuateRow_C(const uint8* src_argb, uint8* dst_argb, int width) {
1968   int i;
1969   for (i = 0; i < width - 1; i += 2) {
1970     uint32 b = src_argb[0];
1971     uint32 g = src_argb[1];
1972     uint32 r = src_argb[2];
1973     uint32 a = src_argb[3];
1974     dst_argb[0] = ATTENUATE(b, a);
1975     dst_argb[1] = ATTENUATE(g, a);
1976     dst_argb[2] = ATTENUATE(r, a);
1977     dst_argb[3] = a;
1978     b = src_argb[4];
1979     g = src_argb[5];
1980     r = src_argb[6];
1981     a = src_argb[7];
1982     dst_argb[4] = ATTENUATE(b, a);
1983     dst_argb[5] = ATTENUATE(g, a);
1984     dst_argb[6] = ATTENUATE(r, a);
1985     dst_argb[7] = a;
1986     src_argb += 8;
1987     dst_argb += 8;
1988   }
1989 
1990   if (width & 1) {
1991     const uint32 b = src_argb[0];
1992     const uint32 g = src_argb[1];
1993     const uint32 r = src_argb[2];
1994     const uint32 a = src_argb[3];
1995     dst_argb[0] = ATTENUATE(b, a);
1996     dst_argb[1] = ATTENUATE(g, a);
1997     dst_argb[2] = ATTENUATE(r, a);
1998     dst_argb[3] = a;
1999   }
2000 }
2001 #undef ATTENUATE
2002 
2003 // Divide source RGB by alpha and store to destination.
2004 // b = (b * 255 + (a / 2)) / a;
2005 // g = (g * 255 + (a / 2)) / a;
2006 // r = (r * 255 + (a / 2)) / a;
2007 // Reciprocal method is off by 1 on some values. ie 125
2008 // 8.8 fixed point inverse table with 1.0 in upper short and 1 / a in lower.
2009 #define T(a) 0x01000000 + (0x10000 / a)
2010 const uint32 fixed_invtbl8[256] = {
2011     0x01000000, 0x0100ffff, T(0x02), T(0x03),   T(0x04), T(0x05), T(0x06),
2012     T(0x07),    T(0x08),    T(0x09), T(0x0a),   T(0x0b), T(0x0c), T(0x0d),
2013     T(0x0e),    T(0x0f),    T(0x10), T(0x11),   T(0x12), T(0x13), T(0x14),
2014     T(0x15),    T(0x16),    T(0x17), T(0x18),   T(0x19), T(0x1a), T(0x1b),
2015     T(0x1c),    T(0x1d),    T(0x1e), T(0x1f),   T(0x20), T(0x21), T(0x22),
2016     T(0x23),    T(0x24),    T(0x25), T(0x26),   T(0x27), T(0x28), T(0x29),
2017     T(0x2a),    T(0x2b),    T(0x2c), T(0x2d),   T(0x2e), T(0x2f), T(0x30),
2018     T(0x31),    T(0x32),    T(0x33), T(0x34),   T(0x35), T(0x36), T(0x37),
2019     T(0x38),    T(0x39),    T(0x3a), T(0x3b),   T(0x3c), T(0x3d), T(0x3e),
2020     T(0x3f),    T(0x40),    T(0x41), T(0x42),   T(0x43), T(0x44), T(0x45),
2021     T(0x46),    T(0x47),    T(0x48), T(0x49),   T(0x4a), T(0x4b), T(0x4c),
2022     T(0x4d),    T(0x4e),    T(0x4f), T(0x50),   T(0x51), T(0x52), T(0x53),
2023     T(0x54),    T(0x55),    T(0x56), T(0x57),   T(0x58), T(0x59), T(0x5a),
2024     T(0x5b),    T(0x5c),    T(0x5d), T(0x5e),   T(0x5f), T(0x60), T(0x61),
2025     T(0x62),    T(0x63),    T(0x64), T(0x65),   T(0x66), T(0x67), T(0x68),
2026     T(0x69),    T(0x6a),    T(0x6b), T(0x6c),   T(0x6d), T(0x6e), T(0x6f),
2027     T(0x70),    T(0x71),    T(0x72), T(0x73),   T(0x74), T(0x75), T(0x76),
2028     T(0x77),    T(0x78),    T(0x79), T(0x7a),   T(0x7b), T(0x7c), T(0x7d),
2029     T(0x7e),    T(0x7f),    T(0x80), T(0x81),   T(0x82), T(0x83), T(0x84),
2030     T(0x85),    T(0x86),    T(0x87), T(0x88),   T(0x89), T(0x8a), T(0x8b),
2031     T(0x8c),    T(0x8d),    T(0x8e), T(0x8f),   T(0x90), T(0x91), T(0x92),
2032     T(0x93),    T(0x94),    T(0x95), T(0x96),   T(0x97), T(0x98), T(0x99),
2033     T(0x9a),    T(0x9b),    T(0x9c), T(0x9d),   T(0x9e), T(0x9f), T(0xa0),
2034     T(0xa1),    T(0xa2),    T(0xa3), T(0xa4),   T(0xa5), T(0xa6), T(0xa7),
2035     T(0xa8),    T(0xa9),    T(0xaa), T(0xab),   T(0xac), T(0xad), T(0xae),
2036     T(0xaf),    T(0xb0),    T(0xb1), T(0xb2),   T(0xb3), T(0xb4), T(0xb5),
2037     T(0xb6),    T(0xb7),    T(0xb8), T(0xb9),   T(0xba), T(0xbb), T(0xbc),
2038     T(0xbd),    T(0xbe),    T(0xbf), T(0xc0),   T(0xc1), T(0xc2), T(0xc3),
2039     T(0xc4),    T(0xc5),    T(0xc6), T(0xc7),   T(0xc8), T(0xc9), T(0xca),
2040     T(0xcb),    T(0xcc),    T(0xcd), T(0xce),   T(0xcf), T(0xd0), T(0xd1),
2041     T(0xd2),    T(0xd3),    T(0xd4), T(0xd5),   T(0xd6), T(0xd7), T(0xd8),
2042     T(0xd9),    T(0xda),    T(0xdb), T(0xdc),   T(0xdd), T(0xde), T(0xdf),
2043     T(0xe0),    T(0xe1),    T(0xe2), T(0xe3),   T(0xe4), T(0xe5), T(0xe6),
2044     T(0xe7),    T(0xe8),    T(0xe9), T(0xea),   T(0xeb), T(0xec), T(0xed),
2045     T(0xee),    T(0xef),    T(0xf0), T(0xf1),   T(0xf2), T(0xf3), T(0xf4),
2046     T(0xf5),    T(0xf6),    T(0xf7), T(0xf8),   T(0xf9), T(0xfa), T(0xfb),
2047     T(0xfc),    T(0xfd),    T(0xfe), 0x01000100};
2048 #undef T
2049 
ARGBUnattenuateRow_C(const uint8 * src_argb,uint8 * dst_argb,int width)2050 void ARGBUnattenuateRow_C(const uint8* src_argb, uint8* dst_argb, int width) {
2051   int i;
2052   for (i = 0; i < width; ++i) {
2053     uint32 b = src_argb[0];
2054     uint32 g = src_argb[1];
2055     uint32 r = src_argb[2];
2056     const uint32 a = src_argb[3];
2057     const uint32 ia = fixed_invtbl8[a] & 0xffff;  // 8.8 fixed point
2058     b = (b * ia) >> 8;
2059     g = (g * ia) >> 8;
2060     r = (r * ia) >> 8;
2061     // Clamping should not be necessary but is free in assembly.
2062     dst_argb[0] = clamp255(b);
2063     dst_argb[1] = clamp255(g);
2064     dst_argb[2] = clamp255(r);
2065     dst_argb[3] = a;
2066     src_argb += 4;
2067     dst_argb += 4;
2068   }
2069 }
2070 
ComputeCumulativeSumRow_C(const uint8 * row,int32 * cumsum,const int32 * previous_cumsum,int width)2071 void ComputeCumulativeSumRow_C(const uint8* row,
2072                                int32* cumsum,
2073                                const int32* previous_cumsum,
2074                                int width) {
2075   int32 row_sum[4] = {0, 0, 0, 0};
2076   int x;
2077   for (x = 0; x < width; ++x) {
2078     row_sum[0] += row[x * 4 + 0];
2079     row_sum[1] += row[x * 4 + 1];
2080     row_sum[2] += row[x * 4 + 2];
2081     row_sum[3] += row[x * 4 + 3];
2082     cumsum[x * 4 + 0] = row_sum[0] + previous_cumsum[x * 4 + 0];
2083     cumsum[x * 4 + 1] = row_sum[1] + previous_cumsum[x * 4 + 1];
2084     cumsum[x * 4 + 2] = row_sum[2] + previous_cumsum[x * 4 + 2];
2085     cumsum[x * 4 + 3] = row_sum[3] + previous_cumsum[x * 4 + 3];
2086   }
2087 }
2088 
CumulativeSumToAverageRow_C(const int32 * tl,const int32 * bl,int w,int area,uint8 * dst,int count)2089 void CumulativeSumToAverageRow_C(const int32* tl,
2090                                  const int32* bl,
2091                                  int w,
2092                                  int area,
2093                                  uint8* dst,
2094                                  int count) {
2095   float ooa = 1.0f / area;
2096   int i;
2097   for (i = 0; i < count; ++i) {
2098     dst[0] = (uint8)((bl[w + 0] + tl[0] - bl[0] - tl[w + 0]) * ooa);
2099     dst[1] = (uint8)((bl[w + 1] + tl[1] - bl[1] - tl[w + 1]) * ooa);
2100     dst[2] = (uint8)((bl[w + 2] + tl[2] - bl[2] - tl[w + 2]) * ooa);
2101     dst[3] = (uint8)((bl[w + 3] + tl[3] - bl[3] - tl[w + 3]) * ooa);
2102     dst += 4;
2103     tl += 4;
2104     bl += 4;
2105   }
2106 }
2107 
2108 // Copy pixels from rotated source to destination row with a slope.
2109 LIBYUV_API
ARGBAffineRow_C(const uint8 * src_argb,int src_argb_stride,uint8 * dst_argb,const float * uv_dudv,int width)2110 void ARGBAffineRow_C(const uint8* src_argb,
2111                      int src_argb_stride,
2112                      uint8* dst_argb,
2113                      const float* uv_dudv,
2114                      int width) {
2115   int i;
2116   // Render a row of pixels from source into a buffer.
2117   float uv[2];
2118   uv[0] = uv_dudv[0];
2119   uv[1] = uv_dudv[1];
2120   for (i = 0; i < width; ++i) {
2121     int x = (int)(uv[0]);
2122     int y = (int)(uv[1]);
2123     *(uint32*)(dst_argb) =
2124         *(const uint32*)(src_argb + y * src_argb_stride + x * 4);
2125     dst_argb += 4;
2126     uv[0] += uv_dudv[2];
2127     uv[1] += uv_dudv[3];
2128   }
2129 }
2130 
2131 // Blend 2 rows into 1.
HalfRow_C(const uint8 * src_uv,ptrdiff_t src_uv_stride,uint8 * dst_uv,int width)2132 static void HalfRow_C(const uint8* src_uv,
2133                       ptrdiff_t src_uv_stride,
2134                       uint8* dst_uv,
2135                       int width) {
2136   int x;
2137   for (x = 0; x < width; ++x) {
2138     dst_uv[x] = (src_uv[x] + src_uv[src_uv_stride + x] + 1) >> 1;
2139   }
2140 }
2141 
HalfRow_16_C(const uint16 * src_uv,ptrdiff_t src_uv_stride,uint16 * dst_uv,int width)2142 static void HalfRow_16_C(const uint16* src_uv,
2143                          ptrdiff_t src_uv_stride,
2144                          uint16* dst_uv,
2145                          int width) {
2146   int x;
2147   for (x = 0; x < width; ++x) {
2148     dst_uv[x] = (src_uv[x] + src_uv[src_uv_stride + x] + 1) >> 1;
2149   }
2150 }
2151 
2152 // C version 2x2 -> 2x1.
InterpolateRow_C(uint8 * dst_ptr,const uint8 * src_ptr,ptrdiff_t src_stride,int width,int source_y_fraction)2153 void InterpolateRow_C(uint8* dst_ptr,
2154                       const uint8* src_ptr,
2155                       ptrdiff_t src_stride,
2156                       int width,
2157                       int source_y_fraction) {
2158   int y1_fraction = source_y_fraction;
2159   int y0_fraction = 256 - y1_fraction;
2160   const uint8* src_ptr1 = src_ptr + src_stride;
2161   int x;
2162   if (y1_fraction == 0) {
2163     memcpy(dst_ptr, src_ptr, width);
2164     return;
2165   }
2166   if (y1_fraction == 128) {
2167     HalfRow_C(src_ptr, src_stride, dst_ptr, width);
2168     return;
2169   }
2170   for (x = 0; x < width - 1; x += 2) {
2171     dst_ptr[0] =
2172         (src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction + 128) >> 8;
2173     dst_ptr[1] =
2174         (src_ptr[1] * y0_fraction + src_ptr1[1] * y1_fraction + 128) >> 8;
2175     src_ptr += 2;
2176     src_ptr1 += 2;
2177     dst_ptr += 2;
2178   }
2179   if (width & 1) {
2180     dst_ptr[0] =
2181         (src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction + 128) >> 8;
2182   }
2183 }
2184 
InterpolateRow_16_C(uint16 * dst_ptr,const uint16 * src_ptr,ptrdiff_t src_stride,int width,int source_y_fraction)2185 void InterpolateRow_16_C(uint16* dst_ptr,
2186                          const uint16* src_ptr,
2187                          ptrdiff_t src_stride,
2188                          int width,
2189                          int source_y_fraction) {
2190   int y1_fraction = source_y_fraction;
2191   int y0_fraction = 256 - y1_fraction;
2192   const uint16* src_ptr1 = src_ptr + src_stride;
2193   int x;
2194   if (source_y_fraction == 0) {
2195     memcpy(dst_ptr, src_ptr, width * 2);
2196     return;
2197   }
2198   if (source_y_fraction == 128) {
2199     HalfRow_16_C(src_ptr, src_stride, dst_ptr, width);
2200     return;
2201   }
2202   for (x = 0; x < width - 1; x += 2) {
2203     dst_ptr[0] = (src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction) >> 8;
2204     dst_ptr[1] = (src_ptr[1] * y0_fraction + src_ptr1[1] * y1_fraction) >> 8;
2205     src_ptr += 2;
2206     src_ptr1 += 2;
2207     dst_ptr += 2;
2208   }
2209   if (width & 1) {
2210     dst_ptr[0] = (src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction) >> 8;
2211   }
2212 }
2213 
2214 // Use first 4 shuffler values to reorder ARGB channels.
ARGBShuffleRow_C(const uint8 * src_argb,uint8 * dst_argb,const uint8 * shuffler,int width)2215 void ARGBShuffleRow_C(const uint8* src_argb,
2216                       uint8* dst_argb,
2217                       const uint8* shuffler,
2218                       int width) {
2219   int index0 = shuffler[0];
2220   int index1 = shuffler[1];
2221   int index2 = shuffler[2];
2222   int index3 = shuffler[3];
2223   // Shuffle a row of ARGB.
2224   int x;
2225   for (x = 0; x < width; ++x) {
2226     // To support in-place conversion.
2227     uint8 b = src_argb[index0];
2228     uint8 g = src_argb[index1];
2229     uint8 r = src_argb[index2];
2230     uint8 a = src_argb[index3];
2231     dst_argb[0] = b;
2232     dst_argb[1] = g;
2233     dst_argb[2] = r;
2234     dst_argb[3] = a;
2235     src_argb += 4;
2236     dst_argb += 4;
2237   }
2238 }
2239 
I422ToYUY2Row_C(const uint8 * src_y,const uint8 * src_u,const uint8 * src_v,uint8 * dst_frame,int width)2240 void I422ToYUY2Row_C(const uint8* src_y,
2241                      const uint8* src_u,
2242                      const uint8* src_v,
2243                      uint8* dst_frame,
2244                      int width) {
2245   int x;
2246   for (x = 0; x < width - 1; x += 2) {
2247     dst_frame[0] = src_y[0];
2248     dst_frame[1] = src_u[0];
2249     dst_frame[2] = src_y[1];
2250     dst_frame[3] = src_v[0];
2251     dst_frame += 4;
2252     src_y += 2;
2253     src_u += 1;
2254     src_v += 1;
2255   }
2256   if (width & 1) {
2257     dst_frame[0] = src_y[0];
2258     dst_frame[1] = src_u[0];
2259     dst_frame[2] = 0;
2260     dst_frame[3] = src_v[0];
2261   }
2262 }
2263 
I422ToUYVYRow_C(const uint8 * src_y,const uint8 * src_u,const uint8 * src_v,uint8 * dst_frame,int width)2264 void I422ToUYVYRow_C(const uint8* src_y,
2265                      const uint8* src_u,
2266                      const uint8* src_v,
2267                      uint8* dst_frame,
2268                      int width) {
2269   int x;
2270   for (x = 0; x < width - 1; x += 2) {
2271     dst_frame[0] = src_u[0];
2272     dst_frame[1] = src_y[0];
2273     dst_frame[2] = src_v[0];
2274     dst_frame[3] = src_y[1];
2275     dst_frame += 4;
2276     src_y += 2;
2277     src_u += 1;
2278     src_v += 1;
2279   }
2280   if (width & 1) {
2281     dst_frame[0] = src_u[0];
2282     dst_frame[1] = src_y[0];
2283     dst_frame[2] = src_v[0];
2284     dst_frame[3] = 0;
2285   }
2286 }
2287 
ARGBPolynomialRow_C(const uint8 * src_argb,uint8 * dst_argb,const float * poly,int width)2288 void ARGBPolynomialRow_C(const uint8* src_argb,
2289                          uint8* dst_argb,
2290                          const float* poly,
2291                          int width) {
2292   int i;
2293   for (i = 0; i < width; ++i) {
2294     float b = (float)(src_argb[0]);
2295     float g = (float)(src_argb[1]);
2296     float r = (float)(src_argb[2]);
2297     float a = (float)(src_argb[3]);
2298     float b2 = b * b;
2299     float g2 = g * g;
2300     float r2 = r * r;
2301     float a2 = a * a;
2302     float db = poly[0] + poly[4] * b;
2303     float dg = poly[1] + poly[5] * g;
2304     float dr = poly[2] + poly[6] * r;
2305     float da = poly[3] + poly[7] * a;
2306     float b3 = b2 * b;
2307     float g3 = g2 * g;
2308     float r3 = r2 * r;
2309     float a3 = a2 * a;
2310     db += poly[8] * b2;
2311     dg += poly[9] * g2;
2312     dr += poly[10] * r2;
2313     da += poly[11] * a2;
2314     db += poly[12] * b3;
2315     dg += poly[13] * g3;
2316     dr += poly[14] * r3;
2317     da += poly[15] * a3;
2318 
2319     dst_argb[0] = Clamp((int32)(db));
2320     dst_argb[1] = Clamp((int32)(dg));
2321     dst_argb[2] = Clamp((int32)(dr));
2322     dst_argb[3] = Clamp((int32)(da));
2323     src_argb += 4;
2324     dst_argb += 4;
2325   }
2326 }
2327 
2328 // Samples assumed to be unsigned in low 9, 10 or 12 bits.  Scale factor
2329 // adjust the source integer range to the half float range desired.
2330 
2331 // This magic constant is 2^-112. Multiplying by this
2332 // is the same as subtracting 112 from the exponent, which
2333 // is the difference in exponent bias between 32-bit and
2334 // 16-bit floats. Once we've done this subtraction, we can
2335 // simply extract the low bits of the exponent and the high
2336 // bits of the mantissa from our float and we're done.
2337 
HalfFloatRow_C(const uint16 * src,uint16 * dst,float scale,int width)2338 void HalfFloatRow_C(const uint16* src, uint16* dst, float scale, int width) {
2339   int i;
2340   float mult = 1.9259299444e-34f * scale;
2341   for (i = 0; i < width; ++i) {
2342     float value = src[i] * mult;
2343     dst[i] = (uint16)((*(uint32_t*)&value) >> 13);
2344   }
2345 }
2346 
ARGBLumaColorTableRow_C(const uint8 * src_argb,uint8 * dst_argb,int width,const uint8 * luma,uint32 lumacoeff)2347 void ARGBLumaColorTableRow_C(const uint8* src_argb,
2348                              uint8* dst_argb,
2349                              int width,
2350                              const uint8* luma,
2351                              uint32 lumacoeff) {
2352   uint32 bc = lumacoeff & 0xff;
2353   uint32 gc = (lumacoeff >> 8) & 0xff;
2354   uint32 rc = (lumacoeff >> 16) & 0xff;
2355 
2356   int i;
2357   for (i = 0; i < width - 1; i += 2) {
2358     // Luminance in rows, color values in columns.
2359     const uint8* luma0 =
2360         ((src_argb[0] * bc + src_argb[1] * gc + src_argb[2] * rc) & 0x7F00u) +
2361         luma;
2362     const uint8* luma1;
2363     dst_argb[0] = luma0[src_argb[0]];
2364     dst_argb[1] = luma0[src_argb[1]];
2365     dst_argb[2] = luma0[src_argb[2]];
2366     dst_argb[3] = src_argb[3];
2367     luma1 =
2368         ((src_argb[4] * bc + src_argb[5] * gc + src_argb[6] * rc) & 0x7F00u) +
2369         luma;
2370     dst_argb[4] = luma1[src_argb[4]];
2371     dst_argb[5] = luma1[src_argb[5]];
2372     dst_argb[6] = luma1[src_argb[6]];
2373     dst_argb[7] = src_argb[7];
2374     src_argb += 8;
2375     dst_argb += 8;
2376   }
2377   if (width & 1) {
2378     // Luminance in rows, color values in columns.
2379     const uint8* luma0 =
2380         ((src_argb[0] * bc + src_argb[1] * gc + src_argb[2] * rc) & 0x7F00u) +
2381         luma;
2382     dst_argb[0] = luma0[src_argb[0]];
2383     dst_argb[1] = luma0[src_argb[1]];
2384     dst_argb[2] = luma0[src_argb[2]];
2385     dst_argb[3] = src_argb[3];
2386   }
2387 }
2388 
ARGBCopyAlphaRow_C(const uint8 * src,uint8 * dst,int width)2389 void ARGBCopyAlphaRow_C(const uint8* src, uint8* dst, int width) {
2390   int i;
2391   for (i = 0; i < width - 1; i += 2) {
2392     dst[3] = src[3];
2393     dst[7] = src[7];
2394     dst += 8;
2395     src += 8;
2396   }
2397   if (width & 1) {
2398     dst[3] = src[3];
2399   }
2400 }
2401 
ARGBExtractAlphaRow_C(const uint8 * src_argb,uint8 * dst_a,int width)2402 void ARGBExtractAlphaRow_C(const uint8* src_argb, uint8* dst_a, int width) {
2403   int i;
2404   for (i = 0; i < width - 1; i += 2) {
2405     dst_a[0] = src_argb[3];
2406     dst_a[1] = src_argb[7];
2407     dst_a += 2;
2408     src_argb += 8;
2409   }
2410   if (width & 1) {
2411     dst_a[0] = src_argb[3];
2412   }
2413 }
2414 
ARGBCopyYToAlphaRow_C(const uint8 * src,uint8 * dst,int width)2415 void ARGBCopyYToAlphaRow_C(const uint8* src, uint8* dst, int width) {
2416   int i;
2417   for (i = 0; i < width - 1; i += 2) {
2418     dst[3] = src[0];
2419     dst[7] = src[1];
2420     dst += 8;
2421     src += 2;
2422   }
2423   if (width & 1) {
2424     dst[3] = src[0];
2425   }
2426 }
2427 
2428 // Maximum temporary width for wrappers to process at a time, in pixels.
2429 #define MAXTWIDTH 2048
2430 
2431 #if !(defined(_MSC_VER) && defined(_M_IX86)) && \
2432     defined(HAS_I422TORGB565ROW_SSSE3)
2433 // row_win.cc has asm version, but GCC uses 2 step wrapper.
I422ToRGB565Row_SSSE3(const uint8 * src_y,const uint8 * src_u,const uint8 * src_v,uint8 * dst_rgb565,const struct YuvConstants * yuvconstants,int width)2434 void I422ToRGB565Row_SSSE3(const uint8* src_y,
2435                            const uint8* src_u,
2436                            const uint8* src_v,
2437                            uint8* dst_rgb565,
2438                            const struct YuvConstants* yuvconstants,
2439                            int width) {
2440   SIMD_ALIGNED(uint8 row[MAXTWIDTH * 4]);
2441   while (width > 0) {
2442     int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
2443     I422ToARGBRow_SSSE3(src_y, src_u, src_v, row, yuvconstants, twidth);
2444     ARGBToRGB565Row_SSE2(row, dst_rgb565, twidth);
2445     src_y += twidth;
2446     src_u += twidth / 2;
2447     src_v += twidth / 2;
2448     dst_rgb565 += twidth * 2;
2449     width -= twidth;
2450   }
2451 }
2452 #endif
2453 
2454 #if defined(HAS_I422TOARGB1555ROW_SSSE3)
I422ToARGB1555Row_SSSE3(const uint8 * src_y,const uint8 * src_u,const uint8 * src_v,uint8 * dst_argb1555,const struct YuvConstants * yuvconstants,int width)2455 void I422ToARGB1555Row_SSSE3(const uint8* src_y,
2456                              const uint8* src_u,
2457                              const uint8* src_v,
2458                              uint8* dst_argb1555,
2459                              const struct YuvConstants* yuvconstants,
2460                              int width) {
2461   // Row buffer for intermediate ARGB pixels.
2462   SIMD_ALIGNED(uint8 row[MAXTWIDTH * 4]);
2463   while (width > 0) {
2464     int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
2465     I422ToARGBRow_SSSE3(src_y, src_u, src_v, row, yuvconstants, twidth);
2466     ARGBToARGB1555Row_SSE2(row, dst_argb1555, twidth);
2467     src_y += twidth;
2468     src_u += twidth / 2;
2469     src_v += twidth / 2;
2470     dst_argb1555 += twidth * 2;
2471     width -= twidth;
2472   }
2473 }
2474 #endif
2475 
2476 #if defined(HAS_I422TOARGB4444ROW_SSSE3)
I422ToARGB4444Row_SSSE3(const uint8 * src_y,const uint8 * src_u,const uint8 * src_v,uint8 * dst_argb4444,const struct YuvConstants * yuvconstants,int width)2477 void I422ToARGB4444Row_SSSE3(const uint8* src_y,
2478                              const uint8* src_u,
2479                              const uint8* src_v,
2480                              uint8* dst_argb4444,
2481                              const struct YuvConstants* yuvconstants,
2482                              int width) {
2483   // Row buffer for intermediate ARGB pixels.
2484   SIMD_ALIGNED(uint8 row[MAXTWIDTH * 4]);
2485   while (width > 0) {
2486     int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
2487     I422ToARGBRow_SSSE3(src_y, src_u, src_v, row, yuvconstants, twidth);
2488     ARGBToARGB4444Row_SSE2(row, dst_argb4444, twidth);
2489     src_y += twidth;
2490     src_u += twidth / 2;
2491     src_v += twidth / 2;
2492     dst_argb4444 += twidth * 2;
2493     width -= twidth;
2494   }
2495 }
2496 #endif
2497 
2498 #if defined(HAS_NV12TORGB565ROW_SSSE3)
NV12ToRGB565Row_SSSE3(const uint8 * src_y,const uint8 * src_uv,uint8 * dst_rgb565,const struct YuvConstants * yuvconstants,int width)2499 void NV12ToRGB565Row_SSSE3(const uint8* src_y,
2500                            const uint8* src_uv,
2501                            uint8* dst_rgb565,
2502                            const struct YuvConstants* yuvconstants,
2503                            int width) {
2504   // Row buffer for intermediate ARGB pixels.
2505   SIMD_ALIGNED(uint8 row[MAXTWIDTH * 4]);
2506   while (width > 0) {
2507     int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
2508     NV12ToARGBRow_SSSE3(src_y, src_uv, row, yuvconstants, twidth);
2509     ARGBToRGB565Row_SSE2(row, dst_rgb565, twidth);
2510     src_y += twidth;
2511     src_uv += twidth;
2512     dst_rgb565 += twidth * 2;
2513     width -= twidth;
2514   }
2515 }
2516 #endif
2517 
2518 #if defined(HAS_I422TORGB565ROW_AVX2)
I422ToRGB565Row_AVX2(const uint8 * src_y,const uint8 * src_u,const uint8 * src_v,uint8 * dst_rgb565,const struct YuvConstants * yuvconstants,int width)2519 void I422ToRGB565Row_AVX2(const uint8* src_y,
2520                           const uint8* src_u,
2521                           const uint8* src_v,
2522                           uint8* dst_rgb565,
2523                           const struct YuvConstants* yuvconstants,
2524                           int width) {
2525   SIMD_ALIGNED(uint8 row[MAXTWIDTH * 4]);
2526   while (width > 0) {
2527     int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
2528     I422ToARGBRow_AVX2(src_y, src_u, src_v, row, yuvconstants, twidth);
2529 #if defined(HAS_ARGBTORGB565ROW_AVX2)
2530     ARGBToRGB565Row_AVX2(row, dst_rgb565, twidth);
2531 #else
2532     ARGBToRGB565Row_SSE2(row, dst_rgb565, twidth);
2533 #endif
2534     src_y += twidth;
2535     src_u += twidth / 2;
2536     src_v += twidth / 2;
2537     dst_rgb565 += twidth * 2;
2538     width -= twidth;
2539   }
2540 }
2541 #endif
2542 
2543 #if defined(HAS_I422TOARGB1555ROW_AVX2)
I422ToARGB1555Row_AVX2(const uint8 * src_y,const uint8 * src_u,const uint8 * src_v,uint8 * dst_argb1555,const struct YuvConstants * yuvconstants,int width)2544 void I422ToARGB1555Row_AVX2(const uint8* src_y,
2545                             const uint8* src_u,
2546                             const uint8* src_v,
2547                             uint8* dst_argb1555,
2548                             const struct YuvConstants* yuvconstants,
2549                             int width) {
2550   // Row buffer for intermediate ARGB pixels.
2551   SIMD_ALIGNED(uint8 row[MAXTWIDTH * 4]);
2552   while (width > 0) {
2553     int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
2554     I422ToARGBRow_AVX2(src_y, src_u, src_v, row, yuvconstants, twidth);
2555 #if defined(HAS_ARGBTOARGB1555ROW_AVX2)
2556     ARGBToARGB1555Row_AVX2(row, dst_argb1555, twidth);
2557 #else
2558     ARGBToARGB1555Row_SSE2(row, dst_argb1555, twidth);
2559 #endif
2560     src_y += twidth;
2561     src_u += twidth / 2;
2562     src_v += twidth / 2;
2563     dst_argb1555 += twidth * 2;
2564     width -= twidth;
2565   }
2566 }
2567 #endif
2568 
2569 #if defined(HAS_I422TOARGB4444ROW_AVX2)
I422ToARGB4444Row_AVX2(const uint8 * src_y,const uint8 * src_u,const uint8 * src_v,uint8 * dst_argb4444,const struct YuvConstants * yuvconstants,int width)2570 void I422ToARGB4444Row_AVX2(const uint8* src_y,
2571                             const uint8* src_u,
2572                             const uint8* src_v,
2573                             uint8* dst_argb4444,
2574                             const struct YuvConstants* yuvconstants,
2575                             int width) {
2576   // Row buffer for intermediate ARGB pixels.
2577   SIMD_ALIGNED(uint8 row[MAXTWIDTH * 4]);
2578   while (width > 0) {
2579     int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
2580     I422ToARGBRow_AVX2(src_y, src_u, src_v, row, yuvconstants, twidth);
2581 #if defined(HAS_ARGBTOARGB4444ROW_AVX2)
2582     ARGBToARGB4444Row_AVX2(row, dst_argb4444, twidth);
2583 #else
2584     ARGBToARGB4444Row_SSE2(row, dst_argb4444, twidth);
2585 #endif
2586     src_y += twidth;
2587     src_u += twidth / 2;
2588     src_v += twidth / 2;
2589     dst_argb4444 += twidth * 2;
2590     width -= twidth;
2591   }
2592 }
2593 #endif
2594 
2595 #if defined(HAS_I422TORGB24ROW_AVX2)
I422ToRGB24Row_AVX2(const uint8 * src_y,const uint8 * src_u,const uint8 * src_v,uint8 * dst_rgb24,const struct YuvConstants * yuvconstants,int width)2596 void I422ToRGB24Row_AVX2(const uint8* src_y,
2597                          const uint8* src_u,
2598                          const uint8* src_v,
2599                          uint8* dst_rgb24,
2600                          const struct YuvConstants* yuvconstants,
2601                          int width) {
2602   // Row buffer for intermediate ARGB pixels.
2603   SIMD_ALIGNED(uint8 row[MAXTWIDTH * 4]);
2604   while (width > 0) {
2605     int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
2606     I422ToARGBRow_AVX2(src_y, src_u, src_v, row, yuvconstants, twidth);
2607     // TODO(fbarchard): ARGBToRGB24Row_AVX2
2608     ARGBToRGB24Row_SSSE3(row, dst_rgb24, twidth);
2609     src_y += twidth;
2610     src_u += twidth / 2;
2611     src_v += twidth / 2;
2612     dst_rgb24 += twidth * 3;
2613     width -= twidth;
2614   }
2615 }
2616 #endif
2617 
2618 #if defined(HAS_NV12TORGB565ROW_AVX2)
NV12ToRGB565Row_AVX2(const uint8 * src_y,const uint8 * src_uv,uint8 * dst_rgb565,const struct YuvConstants * yuvconstants,int width)2619 void NV12ToRGB565Row_AVX2(const uint8* src_y,
2620                           const uint8* src_uv,
2621                           uint8* dst_rgb565,
2622                           const struct YuvConstants* yuvconstants,
2623                           int width) {
2624   // Row buffer for intermediate ARGB pixels.
2625   SIMD_ALIGNED(uint8 row[MAXTWIDTH * 4]);
2626   while (width > 0) {
2627     int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
2628     NV12ToARGBRow_AVX2(src_y, src_uv, row, yuvconstants, twidth);
2629 #if defined(HAS_ARGBTORGB565ROW_AVX2)
2630     ARGBToRGB565Row_AVX2(row, dst_rgb565, twidth);
2631 #else
2632     ARGBToRGB565Row_SSE2(row, dst_rgb565, twidth);
2633 #endif
2634     src_y += twidth;
2635     src_uv += twidth;
2636     dst_rgb565 += twidth * 2;
2637     width -= twidth;
2638   }
2639 }
2640 #endif
2641 
2642 #ifdef __cplusplus
2643 }  // extern "C"
2644 }  // namespace libyuv
2645 #endif
2646