1 /*
2  *  Copyright 2011 The LibYuv Project Authors. All rights reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS. All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include "libyuv/row.h"
12 
13 #include <string.h>  // For memcpy and memset.
14 
15 #include "libyuv/basic_types.h"
16 
17 #ifdef __cplusplus
18 namespace libyuv {
19 extern "C" {
20 #endif
21 
22 // llvm x86 is poor at ternary operator, so use branchless min/max.
23 
24 #define USE_BRANCHLESS 1
25 #if USE_BRANCHLESS
clamp0(int32 v)26 static __inline int32 clamp0(int32 v) {
27   return ((-(v) >> 31) & (v));
28 }
29 
clamp255(int32 v)30 static __inline int32 clamp255(int32 v) {
31   return (((255 - (v)) >> 31) | (v)) & 255;
32 }
33 
Clamp(int32 val)34 static __inline uint32 Clamp(int32 val) {
35   int v = clamp0(val);
36   return (uint32)(clamp255(v));
37 }
38 
Abs(int32 v)39 static __inline uint32 Abs(int32 v) {
40   int m = v >> 31;
41   return (v + m) ^ m;
42 }
43 #else  // USE_BRANCHLESS
44 static __inline int32 clamp0(int32 v) {
45   return (v < 0) ? 0 : v;
46 }
47 
48 static __inline int32 clamp255(int32 v) {
49   return (v > 255) ? 255 : v;
50 }
51 
52 static __inline uint32 Clamp(int32 val) {
53   int v = clamp0(val);
54   return (uint32)(clamp255(v));
55 }
56 
57 static __inline uint32 Abs(int32 v) {
58   return (v < 0) ? -v : v;
59 }
60 #endif  // USE_BRANCHLESS
61 
62 #ifdef LIBYUV_LITTLE_ENDIAN
63 #define WRITEWORD(p, v) *(uint32*)(p) = v
64 #else
WRITEWORD(uint8 * p,uint32 v)65 static inline void WRITEWORD(uint8* p, uint32 v) {
66   p[0] = (uint8)(v & 255);
67   p[1] = (uint8)((v >> 8) & 255);
68   p[2] = (uint8)((v >> 16) & 255);
69   p[3] = (uint8)((v >> 24) & 255);
70 }
71 #endif
72 
RGB24ToARGBRow_C(const uint8 * src_rgb24,uint8 * dst_argb,int width)73 void RGB24ToARGBRow_C(const uint8* src_rgb24, uint8* dst_argb, int width) {
74   int x;
75   for (x = 0; x < width; ++x) {
76     uint8 b = src_rgb24[0];
77     uint8 g = src_rgb24[1];
78     uint8 r = src_rgb24[2];
79     dst_argb[0] = b;
80     dst_argb[1] = g;
81     dst_argb[2] = r;
82     dst_argb[3] = 255u;
83     dst_argb += 4;
84     src_rgb24 += 3;
85   }
86 }
87 
RAWToARGBRow_C(const uint8 * src_raw,uint8 * dst_argb,int width)88 void RAWToARGBRow_C(const uint8* src_raw, uint8* dst_argb, int width) {
89   int x;
90   for (x = 0; x < width; ++x) {
91     uint8 r = src_raw[0];
92     uint8 g = src_raw[1];
93     uint8 b = src_raw[2];
94     dst_argb[0] = b;
95     dst_argb[1] = g;
96     dst_argb[2] = r;
97     dst_argb[3] = 255u;
98     dst_argb += 4;
99     src_raw += 3;
100   }
101 }
102 
RGB565ToARGBRow_C(const uint8 * src_rgb565,uint8 * dst_argb,int width)103 void RGB565ToARGBRow_C(const uint8* src_rgb565, uint8* dst_argb, int width) {
104   int x;
105   for (x = 0; x < width; ++x) {
106     uint8 b = src_rgb565[0] & 0x1f;
107     uint8 g = (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3);
108     uint8 r = src_rgb565[1] >> 3;
109     dst_argb[0] = (b << 3) | (b >> 2);
110     dst_argb[1] = (g << 2) | (g >> 4);
111     dst_argb[2] = (r << 3) | (r >> 2);
112     dst_argb[3] = 255u;
113     dst_argb += 4;
114     src_rgb565 += 2;
115   }
116 }
117 
ARGB1555ToARGBRow_C(const uint8 * src_argb1555,uint8 * dst_argb,int width)118 void ARGB1555ToARGBRow_C(const uint8* src_argb1555, uint8* dst_argb,
119                          int width) {
120   int x;
121   for (x = 0; x < width; ++x) {
122     uint8 b = src_argb1555[0] & 0x1f;
123     uint8 g = (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3);
124     uint8 r = (src_argb1555[1] & 0x7c) >> 2;
125     uint8 a = src_argb1555[1] >> 7;
126     dst_argb[0] = (b << 3) | (b >> 2);
127     dst_argb[1] = (g << 3) | (g >> 2);
128     dst_argb[2] = (r << 3) | (r >> 2);
129     dst_argb[3] = -a;
130     dst_argb += 4;
131     src_argb1555 += 2;
132   }
133 }
134 
ARGB4444ToARGBRow_C(const uint8 * src_argb4444,uint8 * dst_argb,int width)135 void ARGB4444ToARGBRow_C(const uint8* src_argb4444, uint8* dst_argb,
136                          int width) {
137   int x;
138   for (x = 0; x < width; ++x) {
139     uint8 b = src_argb4444[0] & 0x0f;
140     uint8 g = src_argb4444[0] >> 4;
141     uint8 r = src_argb4444[1] & 0x0f;
142     uint8 a = src_argb4444[1] >> 4;
143     dst_argb[0] = (b << 4) | b;
144     dst_argb[1] = (g << 4) | g;
145     dst_argb[2] = (r << 4) | r;
146     dst_argb[3] = (a << 4) | a;
147     dst_argb += 4;
148     src_argb4444 += 2;
149   }
150 }
151 
ARGBToRGB24Row_C(const uint8 * src_argb,uint8 * dst_rgb,int width)152 void ARGBToRGB24Row_C(const uint8* src_argb, uint8* dst_rgb, int width) {
153   int x;
154   for (x = 0; x < width; ++x) {
155     uint8 b = src_argb[0];
156     uint8 g = src_argb[1];
157     uint8 r = src_argb[2];
158     dst_rgb[0] = b;
159     dst_rgb[1] = g;
160     dst_rgb[2] = r;
161     dst_rgb += 3;
162     src_argb += 4;
163   }
164 }
165 
ARGBToRAWRow_C(const uint8 * src_argb,uint8 * dst_rgb,int width)166 void ARGBToRAWRow_C(const uint8* src_argb, uint8* dst_rgb, int width) {
167   int x;
168   for (x = 0; x < width; ++x) {
169     uint8 b = src_argb[0];
170     uint8 g = src_argb[1];
171     uint8 r = src_argb[2];
172     dst_rgb[0] = r;
173     dst_rgb[1] = g;
174     dst_rgb[2] = b;
175     dst_rgb += 3;
176     src_argb += 4;
177   }
178 }
179 
ARGBToRGB565Row_C(const uint8 * src_argb,uint8 * dst_rgb,int width)180 void ARGBToRGB565Row_C(const uint8* src_argb, uint8* dst_rgb, int width) {
181   int x;
182   for (x = 0; x < width - 1; x += 2) {
183     uint8 b0 = src_argb[0] >> 3;
184     uint8 g0 = src_argb[1] >> 2;
185     uint8 r0 = src_argb[2] >> 3;
186     uint8 b1 = src_argb[4] >> 3;
187     uint8 g1 = src_argb[5] >> 2;
188     uint8 r1 = src_argb[6] >> 3;
189     WRITEWORD(dst_rgb, b0 | (g0 << 5) | (r0 << 11) |
190               (b1 << 16) | (g1 << 21) | (r1 << 27));
191     dst_rgb += 4;
192     src_argb += 8;
193   }
194   if (width & 1) {
195     uint8 b0 = src_argb[0] >> 3;
196     uint8 g0 = src_argb[1] >> 2;
197     uint8 r0 = src_argb[2] >> 3;
198     *(uint16*)(dst_rgb) = b0 | (g0 << 5) | (r0 << 11);
199   }
200 }
201 
202 // dither4 is a row of 4 values from 4x4 dither matrix.
203 // The 4x4 matrix contains values to increase RGB.  When converting to
204 // fewer bits (565) this provides an ordered dither.
205 // The order in the 4x4 matrix in first byte is upper left.
206 // The 4 values are passed as an int, then referenced as an array, so
207 // endian will not affect order of the original matrix.  But the dither4
208 // will containing the first pixel in the lower byte for little endian
209 // or the upper byte for big endian.
ARGBToRGB565DitherRow_C(const uint8 * src_argb,uint8 * dst_rgb,const uint32 dither4,int width)210 void ARGBToRGB565DitherRow_C(const uint8* src_argb, uint8* dst_rgb,
211                              const uint32 dither4, int width) {
212   int x;
213   for (x = 0; x < width - 1; x += 2) {
214     int dither0 = ((const unsigned char*)(&dither4))[x & 3];
215     int dither1 = ((const unsigned char*)(&dither4))[(x + 1) & 3];
216     uint8 b0 = clamp255(src_argb[0] + dither0) >> 3;
217     uint8 g0 = clamp255(src_argb[1] + dither0) >> 2;
218     uint8 r0 = clamp255(src_argb[2] + dither0) >> 3;
219     uint8 b1 = clamp255(src_argb[4] + dither1) >> 3;
220     uint8 g1 = clamp255(src_argb[5] + dither1) >> 2;
221     uint8 r1 = clamp255(src_argb[6] + dither1) >> 3;
222     WRITEWORD(dst_rgb, b0 | (g0 << 5) | (r0 << 11) |
223               (b1 << 16) | (g1 << 21) | (r1 << 27));
224     dst_rgb += 4;
225     src_argb += 8;
226   }
227   if (width & 1) {
228     int dither0 = ((const unsigned char*)(&dither4))[(width - 1) & 3];
229     uint8 b0 = clamp255(src_argb[0] + dither0) >> 3;
230     uint8 g0 = clamp255(src_argb[1] + dither0) >> 2;
231     uint8 r0 = clamp255(src_argb[2] + dither0) >> 3;
232     *(uint16*)(dst_rgb) = b0 | (g0 << 5) | (r0 << 11);
233   }
234 }
235 
ARGBToARGB1555Row_C(const uint8 * src_argb,uint8 * dst_rgb,int width)236 void ARGBToARGB1555Row_C(const uint8* src_argb, uint8* dst_rgb, int width) {
237   int x;
238   for (x = 0; x < width - 1; x += 2) {
239     uint8 b0 = src_argb[0] >> 3;
240     uint8 g0 = src_argb[1] >> 3;
241     uint8 r0 = src_argb[2] >> 3;
242     uint8 a0 = src_argb[3] >> 7;
243     uint8 b1 = src_argb[4] >> 3;
244     uint8 g1 = src_argb[5] >> 3;
245     uint8 r1 = src_argb[6] >> 3;
246     uint8 a1 = src_argb[7] >> 7;
247     *(uint32*)(dst_rgb) =
248         b0 | (g0 << 5) | (r0 << 10) | (a0 << 15) |
249         (b1 << 16) | (g1 << 21) | (r1 << 26) | (a1 << 31);
250     dst_rgb += 4;
251     src_argb += 8;
252   }
253   if (width & 1) {
254     uint8 b0 = src_argb[0] >> 3;
255     uint8 g0 = src_argb[1] >> 3;
256     uint8 r0 = src_argb[2] >> 3;
257     uint8 a0 = src_argb[3] >> 7;
258     *(uint16*)(dst_rgb) =
259         b0 | (g0 << 5) | (r0 << 10) | (a0 << 15);
260   }
261 }
262 
ARGBToARGB4444Row_C(const uint8 * src_argb,uint8 * dst_rgb,int width)263 void ARGBToARGB4444Row_C(const uint8* src_argb, uint8* dst_rgb, int width) {
264   int x;
265   for (x = 0; x < width - 1; x += 2) {
266     uint8 b0 = src_argb[0] >> 4;
267     uint8 g0 = src_argb[1] >> 4;
268     uint8 r0 = src_argb[2] >> 4;
269     uint8 a0 = src_argb[3] >> 4;
270     uint8 b1 = src_argb[4] >> 4;
271     uint8 g1 = src_argb[5] >> 4;
272     uint8 r1 = src_argb[6] >> 4;
273     uint8 a1 = src_argb[7] >> 4;
274     *(uint32*)(dst_rgb) =
275         b0 | (g0 << 4) | (r0 << 8) | (a0 << 12) |
276         (b1 << 16) | (g1 << 20) | (r1 << 24) | (a1 << 28);
277     dst_rgb += 4;
278     src_argb += 8;
279   }
280   if (width & 1) {
281     uint8 b0 = src_argb[0] >> 4;
282     uint8 g0 = src_argb[1] >> 4;
283     uint8 r0 = src_argb[2] >> 4;
284     uint8 a0 = src_argb[3] >> 4;
285     *(uint16*)(dst_rgb) =
286         b0 | (g0 << 4) | (r0 << 8) | (a0 << 12);
287   }
288 }
289 
RGBToY(uint8 r,uint8 g,uint8 b)290 static __inline int RGBToY(uint8 r, uint8 g, uint8 b) {
291   return (66 * r + 129 * g +  25 * b + 0x1080) >> 8;
292 }
293 
RGBToU(uint8 r,uint8 g,uint8 b)294 static __inline int RGBToU(uint8 r, uint8 g, uint8 b) {
295   return (112 * b - 74 * g - 38 * r + 0x8080) >> 8;
296 }
RGBToV(uint8 r,uint8 g,uint8 b)297 static __inline int RGBToV(uint8 r, uint8 g, uint8 b) {
298   return (112 * r - 94 * g - 18 * b + 0x8080) >> 8;
299 }
300 
301 #define MAKEROWY(NAME, R, G, B, BPP) \
302 void NAME ## ToYRow_C(const uint8* src_argb0, uint8* dst_y, int width) {       \
303   int x;                                                                       \
304   for (x = 0; x < width; ++x) {                                                \
305     dst_y[0] = RGBToY(src_argb0[R], src_argb0[G], src_argb0[B]);               \
306     src_argb0 += BPP;                                                          \
307     dst_y += 1;                                                                \
308   }                                                                            \
309 }                                                                              \
310 void NAME ## ToUVRow_C(const uint8* src_rgb0, int src_stride_rgb,              \
311                        uint8* dst_u, uint8* dst_v, int width) {                \
312   const uint8* src_rgb1 = src_rgb0 + src_stride_rgb;                           \
313   int x;                                                                       \
314   for (x = 0; x < width - 1; x += 2) {                                         \
315     uint8 ab = (src_rgb0[B] + src_rgb0[B + BPP] +                              \
316                src_rgb1[B] + src_rgb1[B + BPP]) >> 2;                          \
317     uint8 ag = (src_rgb0[G] + src_rgb0[G + BPP] +                              \
318                src_rgb1[G] + src_rgb1[G + BPP]) >> 2;                          \
319     uint8 ar = (src_rgb0[R] + src_rgb0[R + BPP] +                              \
320                src_rgb1[R] + src_rgb1[R + BPP]) >> 2;                          \
321     dst_u[0] = RGBToU(ar, ag, ab);                                             \
322     dst_v[0] = RGBToV(ar, ag, ab);                                             \
323     src_rgb0 += BPP * 2;                                                       \
324     src_rgb1 += BPP * 2;                                                       \
325     dst_u += 1;                                                                \
326     dst_v += 1;                                                                \
327   }                                                                            \
328   if (width & 1) {                                                             \
329     uint8 ab = (src_rgb0[B] + src_rgb1[B]) >> 1;                               \
330     uint8 ag = (src_rgb0[G] + src_rgb1[G]) >> 1;                               \
331     uint8 ar = (src_rgb0[R] + src_rgb1[R]) >> 1;                               \
332     dst_u[0] = RGBToU(ar, ag, ab);                                             \
333     dst_v[0] = RGBToV(ar, ag, ab);                                             \
334   }                                                                            \
335 }
336 
337 MAKEROWY(ARGB, 2, 1, 0, 4)
338 MAKEROWY(BGRA, 1, 2, 3, 4)
339 MAKEROWY(ABGR, 0, 1, 2, 4)
340 MAKEROWY(RGBA, 3, 2, 1, 4)
341 MAKEROWY(RGB24, 2, 1, 0, 3)
342 MAKEROWY(RAW, 0, 1, 2, 3)
343 #undef MAKEROWY
344 
345 // JPeg uses a variation on BT.601-1 full range
346 // y =  0.29900 * r + 0.58700 * g + 0.11400 * b
347 // u = -0.16874 * r - 0.33126 * g + 0.50000 * b  + center
348 // v =  0.50000 * r - 0.41869 * g - 0.08131 * b  + center
349 // BT.601 Mpeg range uses:
350 // b 0.1016 * 255 = 25.908 = 25
351 // g 0.5078 * 255 = 129.489 = 129
352 // r 0.2578 * 255 = 65.739 = 66
353 // JPeg 8 bit Y (not used):
354 // b 0.11400 * 256 = 29.184 = 29
355 // g 0.58700 * 256 = 150.272 = 150
356 // r 0.29900 * 256 = 76.544 = 77
357 // JPeg 7 bit Y:
358 // b 0.11400 * 128 = 14.592 = 15
359 // g 0.58700 * 128 = 75.136 = 75
360 // r 0.29900 * 128 = 38.272 = 38
361 // JPeg 8 bit U:
362 // b  0.50000 * 255 = 127.5 = 127
363 // g -0.33126 * 255 = -84.4713 = -84
364 // r -0.16874 * 255 = -43.0287 = -43
365 // JPeg 8 bit V:
366 // b -0.08131 * 255 = -20.73405 = -20
367 // g -0.41869 * 255 = -106.76595 = -107
368 // r  0.50000 * 255 = 127.5 = 127
369 
RGBToYJ(uint8 r,uint8 g,uint8 b)370 static __inline int RGBToYJ(uint8 r, uint8 g, uint8 b) {
371   return (38 * r + 75 * g +  15 * b + 64) >> 7;
372 }
373 
RGBToUJ(uint8 r,uint8 g,uint8 b)374 static __inline int RGBToUJ(uint8 r, uint8 g, uint8 b) {
375   return (127 * b - 84 * g - 43 * r + 0x8080) >> 8;
376 }
RGBToVJ(uint8 r,uint8 g,uint8 b)377 static __inline int RGBToVJ(uint8 r, uint8 g, uint8 b) {
378   return (127 * r - 107 * g - 20 * b + 0x8080) >> 8;
379 }
380 
381 #define AVGB(a, b) (((a) + (b) + 1) >> 1)
382 
383 #define MAKEROWYJ(NAME, R, G, B, BPP) \
384 void NAME ## ToYJRow_C(const uint8* src_argb0, uint8* dst_y, int width) {      \
385   int x;                                                                       \
386   for (x = 0; x < width; ++x) {                                                \
387     dst_y[0] = RGBToYJ(src_argb0[R], src_argb0[G], src_argb0[B]);              \
388     src_argb0 += BPP;                                                          \
389     dst_y += 1;                                                                \
390   }                                                                            \
391 }                                                                              \
392 void NAME ## ToUVJRow_C(const uint8* src_rgb0, int src_stride_rgb,             \
393                         uint8* dst_u, uint8* dst_v, int width) {               \
394   const uint8* src_rgb1 = src_rgb0 + src_stride_rgb;                           \
395   int x;                                                                       \
396   for (x = 0; x < width - 1; x += 2) {                                         \
397     uint8 ab = AVGB(AVGB(src_rgb0[B], src_rgb1[B]),                            \
398                     AVGB(src_rgb0[B + BPP], src_rgb1[B + BPP]));               \
399     uint8 ag = AVGB(AVGB(src_rgb0[G], src_rgb1[G]),                            \
400                     AVGB(src_rgb0[G + BPP], src_rgb1[G + BPP]));               \
401     uint8 ar = AVGB(AVGB(src_rgb0[R], src_rgb1[R]),                            \
402                     AVGB(src_rgb0[R + BPP], src_rgb1[R + BPP]));               \
403     dst_u[0] = RGBToUJ(ar, ag, ab);                                            \
404     dst_v[0] = RGBToVJ(ar, ag, ab);                                            \
405     src_rgb0 += BPP * 2;                                                       \
406     src_rgb1 += BPP * 2;                                                       \
407     dst_u += 1;                                                                \
408     dst_v += 1;                                                                \
409   }                                                                            \
410   if (width & 1) {                                                             \
411     uint8 ab = AVGB(src_rgb0[B], src_rgb1[B]);                                 \
412     uint8 ag = AVGB(src_rgb0[G], src_rgb1[G]);                                 \
413     uint8 ar = AVGB(src_rgb0[R], src_rgb1[R]);                                 \
414     dst_u[0] = RGBToUJ(ar, ag, ab);                                            \
415     dst_v[0] = RGBToVJ(ar, ag, ab);                                            \
416   }                                                                            \
417 }
418 
419 MAKEROWYJ(ARGB, 2, 1, 0, 4)
420 #undef MAKEROWYJ
421 
ARGBToUVJ422Row_C(const uint8 * src_argb,uint8 * dst_u,uint8 * dst_v,int width)422 void ARGBToUVJ422Row_C(const uint8* src_argb,
423                        uint8* dst_u, uint8* dst_v, int width) {
424   int x;
425   for (x = 0; x < width - 1; x += 2) {
426     uint8 ab = (src_argb[0] + src_argb[4]) >> 1;
427     uint8 ag = (src_argb[1] + src_argb[5]) >> 1;
428     uint8 ar = (src_argb[2] + src_argb[6]) >> 1;
429     dst_u[0] = RGBToUJ(ar, ag, ab);
430     dst_v[0] = RGBToVJ(ar, ag, ab);
431     src_argb += 8;
432     dst_u += 1;
433     dst_v += 1;
434   }
435   if (width & 1) {
436     uint8 ab = src_argb[0];
437     uint8 ag = src_argb[1];
438     uint8 ar = src_argb[2];
439     dst_u[0] = RGBToUJ(ar, ag, ab);
440     dst_v[0] = RGBToVJ(ar, ag, ab);
441   }
442 }
443 
RGB565ToYRow_C(const uint8 * src_rgb565,uint8 * dst_y,int width)444 void RGB565ToYRow_C(const uint8* src_rgb565, uint8* dst_y, int width) {
445   int x;
446   for (x = 0; x < width; ++x) {
447     uint8 b = src_rgb565[0] & 0x1f;
448     uint8 g = (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3);
449     uint8 r = src_rgb565[1] >> 3;
450     b = (b << 3) | (b >> 2);
451     g = (g << 2) | (g >> 4);
452     r = (r << 3) | (r >> 2);
453     dst_y[0] = RGBToY(r, g, b);
454     src_rgb565 += 2;
455     dst_y += 1;
456   }
457 }
458 
ARGB1555ToYRow_C(const uint8 * src_argb1555,uint8 * dst_y,int width)459 void ARGB1555ToYRow_C(const uint8* src_argb1555, uint8* dst_y, int width) {
460   int x;
461   for (x = 0; x < width; ++x) {
462     uint8 b = src_argb1555[0] & 0x1f;
463     uint8 g = (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3);
464     uint8 r = (src_argb1555[1] & 0x7c) >> 2;
465     b = (b << 3) | (b >> 2);
466     g = (g << 3) | (g >> 2);
467     r = (r << 3) | (r >> 2);
468     dst_y[0] = RGBToY(r, g, b);
469     src_argb1555 += 2;
470     dst_y += 1;
471   }
472 }
473 
ARGB4444ToYRow_C(const uint8 * src_argb4444,uint8 * dst_y,int width)474 void ARGB4444ToYRow_C(const uint8* src_argb4444, uint8* dst_y, int width) {
475   int x;
476   for (x = 0; x < width; ++x) {
477     uint8 b = src_argb4444[0] & 0x0f;
478     uint8 g = src_argb4444[0] >> 4;
479     uint8 r = src_argb4444[1] & 0x0f;
480     b = (b << 4) | b;
481     g = (g << 4) | g;
482     r = (r << 4) | r;
483     dst_y[0] = RGBToY(r, g, b);
484     src_argb4444 += 2;
485     dst_y += 1;
486   }
487 }
488 
RGB565ToUVRow_C(const uint8 * src_rgb565,int src_stride_rgb565,uint8 * dst_u,uint8 * dst_v,int width)489 void RGB565ToUVRow_C(const uint8* src_rgb565, int src_stride_rgb565,
490                      uint8* dst_u, uint8* dst_v, int width) {
491   const uint8* next_rgb565 = src_rgb565 + src_stride_rgb565;
492   int x;
493   for (x = 0; x < width - 1; x += 2) {
494     uint8 b0 = src_rgb565[0] & 0x1f;
495     uint8 g0 = (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3);
496     uint8 r0 = src_rgb565[1] >> 3;
497     uint8 b1 = src_rgb565[2] & 0x1f;
498     uint8 g1 = (src_rgb565[2] >> 5) | ((src_rgb565[3] & 0x07) << 3);
499     uint8 r1 = src_rgb565[3] >> 3;
500     uint8 b2 = next_rgb565[0] & 0x1f;
501     uint8 g2 = (next_rgb565[0] >> 5) | ((next_rgb565[1] & 0x07) << 3);
502     uint8 r2 = next_rgb565[1] >> 3;
503     uint8 b3 = next_rgb565[2] & 0x1f;
504     uint8 g3 = (next_rgb565[2] >> 5) | ((next_rgb565[3] & 0x07) << 3);
505     uint8 r3 = next_rgb565[3] >> 3;
506     uint8 b = (b0 + b1 + b2 + b3);  // 565 * 4 = 787.
507     uint8 g = (g0 + g1 + g2 + g3);
508     uint8 r = (r0 + r1 + r2 + r3);
509     b = (b << 1) | (b >> 6);  // 787 -> 888.
510     r = (r << 1) | (r >> 6);
511     dst_u[0] = RGBToU(r, g, b);
512     dst_v[0] = RGBToV(r, g, b);
513     src_rgb565 += 4;
514     next_rgb565 += 4;
515     dst_u += 1;
516     dst_v += 1;
517   }
518   if (width & 1) {
519     uint8 b0 = src_rgb565[0] & 0x1f;
520     uint8 g0 = (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3);
521     uint8 r0 = src_rgb565[1] >> 3;
522     uint8 b2 = next_rgb565[0] & 0x1f;
523     uint8 g2 = (next_rgb565[0] >> 5) | ((next_rgb565[1] & 0x07) << 3);
524     uint8 r2 = next_rgb565[1] >> 3;
525     uint8 b = (b0 + b2);  // 565 * 2 = 676.
526     uint8 g = (g0 + g2);
527     uint8 r = (r0 + r2);
528     b = (b << 2) | (b >> 4);  // 676 -> 888
529     g = (g << 1) | (g >> 6);
530     r = (r << 2) | (r >> 4);
531     dst_u[0] = RGBToU(r, g, b);
532     dst_v[0] = RGBToV(r, g, b);
533   }
534 }
535 
ARGB1555ToUVRow_C(const uint8 * src_argb1555,int src_stride_argb1555,uint8 * dst_u,uint8 * dst_v,int width)536 void ARGB1555ToUVRow_C(const uint8* src_argb1555, int src_stride_argb1555,
537                        uint8* dst_u, uint8* dst_v, int width) {
538   const uint8* next_argb1555 = src_argb1555 + src_stride_argb1555;
539   int x;
540   for (x = 0; x < width - 1; x += 2) {
541     uint8 b0 = src_argb1555[0] & 0x1f;
542     uint8 g0 = (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3);
543     uint8 r0 = (src_argb1555[1] & 0x7c) >> 2;
544     uint8 b1 = src_argb1555[2] & 0x1f;
545     uint8 g1 = (src_argb1555[2] >> 5) | ((src_argb1555[3] & 0x03) << 3);
546     uint8 r1 = (src_argb1555[3] & 0x7c) >> 2;
547     uint8 b2 = next_argb1555[0] & 0x1f;
548     uint8 g2 = (next_argb1555[0] >> 5) | ((next_argb1555[1] & 0x03) << 3);
549     uint8 r2 = (next_argb1555[1] & 0x7c) >> 2;
550     uint8 b3 = next_argb1555[2] & 0x1f;
551     uint8 g3 = (next_argb1555[2] >> 5) | ((next_argb1555[3] & 0x03) << 3);
552     uint8 r3 = (next_argb1555[3] & 0x7c) >> 2;
553     uint8 b = (b0 + b1 + b2 + b3);  // 555 * 4 = 777.
554     uint8 g = (g0 + g1 + g2 + g3);
555     uint8 r = (r0 + r1 + r2 + r3);
556     b = (b << 1) | (b >> 6);  // 777 -> 888.
557     g = (g << 1) | (g >> 6);
558     r = (r << 1) | (r >> 6);
559     dst_u[0] = RGBToU(r, g, b);
560     dst_v[0] = RGBToV(r, g, b);
561     src_argb1555 += 4;
562     next_argb1555 += 4;
563     dst_u += 1;
564     dst_v += 1;
565   }
566   if (width & 1) {
567     uint8 b0 = src_argb1555[0] & 0x1f;
568     uint8 g0 = (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3);
569     uint8 r0 = (src_argb1555[1] & 0x7c) >> 2;
570     uint8 b2 = next_argb1555[0] & 0x1f;
571     uint8 g2 = (next_argb1555[0] >> 5) | ((next_argb1555[1] & 0x03) << 3);
572     uint8 r2 = next_argb1555[1] >> 3;
573     uint8 b = (b0 + b2);  // 555 * 2 = 666.
574     uint8 g = (g0 + g2);
575     uint8 r = (r0 + r2);
576     b = (b << 2) | (b >> 4);  // 666 -> 888.
577     g = (g << 2) | (g >> 4);
578     r = (r << 2) | (r >> 4);
579     dst_u[0] = RGBToU(r, g, b);
580     dst_v[0] = RGBToV(r, g, b);
581   }
582 }
583 
ARGB4444ToUVRow_C(const uint8 * src_argb4444,int src_stride_argb4444,uint8 * dst_u,uint8 * dst_v,int width)584 void ARGB4444ToUVRow_C(const uint8* src_argb4444, int src_stride_argb4444,
585                        uint8* dst_u, uint8* dst_v, int width) {
586   const uint8* next_argb4444 = src_argb4444 + src_stride_argb4444;
587   int x;
588   for (x = 0; x < width - 1; x += 2) {
589     uint8 b0 = src_argb4444[0] & 0x0f;
590     uint8 g0 = src_argb4444[0] >> 4;
591     uint8 r0 = src_argb4444[1] & 0x0f;
592     uint8 b1 = src_argb4444[2] & 0x0f;
593     uint8 g1 = src_argb4444[2] >> 4;
594     uint8 r1 = src_argb4444[3] & 0x0f;
595     uint8 b2 = next_argb4444[0] & 0x0f;
596     uint8 g2 = next_argb4444[0] >> 4;
597     uint8 r2 = next_argb4444[1] & 0x0f;
598     uint8 b3 = next_argb4444[2] & 0x0f;
599     uint8 g3 = next_argb4444[2] >> 4;
600     uint8 r3 = next_argb4444[3] & 0x0f;
601     uint8 b = (b0 + b1 + b2 + b3);  // 444 * 4 = 666.
602     uint8 g = (g0 + g1 + g2 + g3);
603     uint8 r = (r0 + r1 + r2 + r3);
604     b = (b << 2) | (b >> 4);  // 666 -> 888.
605     g = (g << 2) | (g >> 4);
606     r = (r << 2) | (r >> 4);
607     dst_u[0] = RGBToU(r, g, b);
608     dst_v[0] = RGBToV(r, g, b);
609     src_argb4444 += 4;
610     next_argb4444 += 4;
611     dst_u += 1;
612     dst_v += 1;
613   }
614   if (width & 1) {
615     uint8 b0 = src_argb4444[0] & 0x0f;
616     uint8 g0 = src_argb4444[0] >> 4;
617     uint8 r0 = src_argb4444[1] & 0x0f;
618     uint8 b2 = next_argb4444[0] & 0x0f;
619     uint8 g2 = next_argb4444[0] >> 4;
620     uint8 r2 = next_argb4444[1] & 0x0f;
621     uint8 b = (b0 + b2);  // 444 * 2 = 555.
622     uint8 g = (g0 + g2);
623     uint8 r = (r0 + r2);
624     b = (b << 3) | (b >> 2);  // 555 -> 888.
625     g = (g << 3) | (g >> 2);
626     r = (r << 3) | (r >> 2);
627     dst_u[0] = RGBToU(r, g, b);
628     dst_v[0] = RGBToV(r, g, b);
629   }
630 }
631 
ARGBToUV444Row_C(const uint8 * src_argb,uint8 * dst_u,uint8 * dst_v,int width)632 void ARGBToUV444Row_C(const uint8* src_argb,
633                       uint8* dst_u, uint8* dst_v, int width) {
634   int x;
635   for (x = 0; x < width; ++x) {
636     uint8 ab = src_argb[0];
637     uint8 ag = src_argb[1];
638     uint8 ar = src_argb[2];
639     dst_u[0] = RGBToU(ar, ag, ab);
640     dst_v[0] = RGBToV(ar, ag, ab);
641     src_argb += 4;
642     dst_u += 1;
643     dst_v += 1;
644   }
645 }
646 
ARGBToUV422Row_C(const uint8 * src_argb,uint8 * dst_u,uint8 * dst_v,int width)647 void ARGBToUV422Row_C(const uint8* src_argb,
648                       uint8* dst_u, uint8* dst_v, int width) {
649   int x;
650   for (x = 0; x < width - 1; x += 2) {
651     uint8 ab = (src_argb[0] + src_argb[4]) >> 1;
652     uint8 ag = (src_argb[1] + src_argb[5]) >> 1;
653     uint8 ar = (src_argb[2] + src_argb[6]) >> 1;
654     dst_u[0] = RGBToU(ar, ag, ab);
655     dst_v[0] = RGBToV(ar, ag, ab);
656     src_argb += 8;
657     dst_u += 1;
658     dst_v += 1;
659   }
660   if (width & 1) {
661     uint8 ab = src_argb[0];
662     uint8 ag = src_argb[1];
663     uint8 ar = src_argb[2];
664     dst_u[0] = RGBToU(ar, ag, ab);
665     dst_v[0] = RGBToV(ar, ag, ab);
666   }
667 }
668 
ARGBToUV411Row_C(const uint8 * src_argb,uint8 * dst_u,uint8 * dst_v,int width)669 void ARGBToUV411Row_C(const uint8* src_argb,
670                       uint8* dst_u, uint8* dst_v, int width) {
671   int x;
672   for (x = 0; x < width - 3; x += 4) {
673     uint8 ab = (src_argb[0] + src_argb[4] + src_argb[8] + src_argb[12]) >> 2;
674     uint8 ag = (src_argb[1] + src_argb[5] + src_argb[9] + src_argb[13]) >> 2;
675     uint8 ar = (src_argb[2] + src_argb[6] + src_argb[10] + src_argb[14]) >> 2;
676     dst_u[0] = RGBToU(ar, ag, ab);
677     dst_v[0] = RGBToV(ar, ag, ab);
678     src_argb += 16;
679     dst_u += 1;
680     dst_v += 1;
681   }
682   if ((width & 3) == 3) {
683     uint8 ab = (src_argb[0] + src_argb[4] + src_argb[8]) / 3;
684     uint8 ag = (src_argb[1] + src_argb[5] + src_argb[9]) / 3;
685     uint8 ar = (src_argb[2] + src_argb[6] + src_argb[10]) / 3;
686     dst_u[0] = RGBToU(ar, ag, ab);
687     dst_v[0] = RGBToV(ar, ag, ab);
688   } else if ((width & 3) == 2) {
689     uint8 ab = (src_argb[0] + src_argb[4]) >> 1;
690     uint8 ag = (src_argb[1] + src_argb[5]) >> 1;
691     uint8 ar = (src_argb[2] + src_argb[6]) >> 1;
692     dst_u[0] = RGBToU(ar, ag, ab);
693     dst_v[0] = RGBToV(ar, ag, ab);
694   } else if ((width & 3) == 1) {
695     uint8 ab = src_argb[0];
696     uint8 ag = src_argb[1];
697     uint8 ar = src_argb[2];
698     dst_u[0] = RGBToU(ar, ag, ab);
699     dst_v[0] = RGBToV(ar, ag, ab);
700   }
701 }
702 
ARGBGrayRow_C(const uint8 * src_argb,uint8 * dst_argb,int width)703 void ARGBGrayRow_C(const uint8* src_argb, uint8* dst_argb, int width) {
704   int x;
705   for (x = 0; x < width; ++x) {
706     uint8 y = RGBToYJ(src_argb[2], src_argb[1], src_argb[0]);
707     dst_argb[2] = dst_argb[1] = dst_argb[0] = y;
708     dst_argb[3] = src_argb[3];
709     dst_argb += 4;
710     src_argb += 4;
711   }
712 }
713 
714 // Convert a row of image to Sepia tone.
ARGBSepiaRow_C(uint8 * dst_argb,int width)715 void ARGBSepiaRow_C(uint8* dst_argb, int width) {
716   int x;
717   for (x = 0; x < width; ++x) {
718     int b = dst_argb[0];
719     int g = dst_argb[1];
720     int r = dst_argb[2];
721     int sb = (b * 17 + g * 68 + r * 35) >> 7;
722     int sg = (b * 22 + g * 88 + r * 45) >> 7;
723     int sr = (b * 24 + g * 98 + r * 50) >> 7;
724     // b does not over flow. a is preserved from original.
725     dst_argb[0] = sb;
726     dst_argb[1] = clamp255(sg);
727     dst_argb[2] = clamp255(sr);
728     dst_argb += 4;
729   }
730 }
731 
732 // Apply color matrix to a row of image. Matrix is signed.
733 // TODO(fbarchard): Consider adding rounding (+32).
ARGBColorMatrixRow_C(const uint8 * src_argb,uint8 * dst_argb,const int8 * matrix_argb,int width)734 void ARGBColorMatrixRow_C(const uint8* src_argb, uint8* dst_argb,
735                           const int8* matrix_argb, int width) {
736   int x;
737   for (x = 0; x < width; ++x) {
738     int b = src_argb[0];
739     int g = src_argb[1];
740     int r = src_argb[2];
741     int a = src_argb[3];
742     int sb = (b * matrix_argb[0] + g * matrix_argb[1] +
743               r * matrix_argb[2] + a * matrix_argb[3]) >> 6;
744     int sg = (b * matrix_argb[4] + g * matrix_argb[5] +
745               r * matrix_argb[6] + a * matrix_argb[7]) >> 6;
746     int sr = (b * matrix_argb[8] + g * matrix_argb[9] +
747               r * matrix_argb[10] + a * matrix_argb[11]) >> 6;
748     int sa = (b * matrix_argb[12] + g * matrix_argb[13] +
749               r * matrix_argb[14] + a * matrix_argb[15]) >> 6;
750     dst_argb[0] = Clamp(sb);
751     dst_argb[1] = Clamp(sg);
752     dst_argb[2] = Clamp(sr);
753     dst_argb[3] = Clamp(sa);
754     src_argb += 4;
755     dst_argb += 4;
756   }
757 }
758 
759 // Apply color table to a row of image.
ARGBColorTableRow_C(uint8 * dst_argb,const uint8 * table_argb,int width)760 void ARGBColorTableRow_C(uint8* dst_argb, const uint8* table_argb, int width) {
761   int x;
762   for (x = 0; x < width; ++x) {
763     int b = dst_argb[0];
764     int g = dst_argb[1];
765     int r = dst_argb[2];
766     int a = dst_argb[3];
767     dst_argb[0] = table_argb[b * 4 + 0];
768     dst_argb[1] = table_argb[g * 4 + 1];
769     dst_argb[2] = table_argb[r * 4 + 2];
770     dst_argb[3] = table_argb[a * 4 + 3];
771     dst_argb += 4;
772   }
773 }
774 
775 // Apply color table to a row of image.
RGBColorTableRow_C(uint8 * dst_argb,const uint8 * table_argb,int width)776 void RGBColorTableRow_C(uint8* dst_argb, const uint8* table_argb, int width) {
777   int x;
778   for (x = 0; x < width; ++x) {
779     int b = dst_argb[0];
780     int g = dst_argb[1];
781     int r = dst_argb[2];
782     dst_argb[0] = table_argb[b * 4 + 0];
783     dst_argb[1] = table_argb[g * 4 + 1];
784     dst_argb[2] = table_argb[r * 4 + 2];
785     dst_argb += 4;
786   }
787 }
788 
ARGBQuantizeRow_C(uint8 * dst_argb,int scale,int interval_size,int interval_offset,int width)789 void ARGBQuantizeRow_C(uint8* dst_argb, int scale, int interval_size,
790                        int interval_offset, int width) {
791   int x;
792   for (x = 0; x < width; ++x) {
793     int b = dst_argb[0];
794     int g = dst_argb[1];
795     int r = dst_argb[2];
796     dst_argb[0] = (b * scale >> 16) * interval_size + interval_offset;
797     dst_argb[1] = (g * scale >> 16) * interval_size + interval_offset;
798     dst_argb[2] = (r * scale >> 16) * interval_size + interval_offset;
799     dst_argb += 4;
800   }
801 }
802 
803 #define REPEAT8(v) (v) | ((v) << 8)
804 #define SHADE(f, v) v * f >> 24
805 
ARGBShadeRow_C(const uint8 * src_argb,uint8 * dst_argb,int width,uint32 value)806 void ARGBShadeRow_C(const uint8* src_argb, uint8* dst_argb, int width,
807                     uint32 value) {
808   const uint32 b_scale = REPEAT8(value & 0xff);
809   const uint32 g_scale = REPEAT8((value >> 8) & 0xff);
810   const uint32 r_scale = REPEAT8((value >> 16) & 0xff);
811   const uint32 a_scale = REPEAT8(value >> 24);
812 
813   int i;
814   for (i = 0; i < width; ++i) {
815     const uint32 b = REPEAT8(src_argb[0]);
816     const uint32 g = REPEAT8(src_argb[1]);
817     const uint32 r = REPEAT8(src_argb[2]);
818     const uint32 a = REPEAT8(src_argb[3]);
819     dst_argb[0] = SHADE(b, b_scale);
820     dst_argb[1] = SHADE(g, g_scale);
821     dst_argb[2] = SHADE(r, r_scale);
822     dst_argb[3] = SHADE(a, a_scale);
823     src_argb += 4;
824     dst_argb += 4;
825   }
826 }
827 #undef REPEAT8
828 #undef SHADE
829 
830 #define REPEAT8(v) (v) | ((v) << 8)
831 #define SHADE(f, v) v * f >> 16
832 
ARGBMultiplyRow_C(const uint8 * src_argb0,const uint8 * src_argb1,uint8 * dst_argb,int width)833 void ARGBMultiplyRow_C(const uint8* src_argb0, const uint8* src_argb1,
834                        uint8* dst_argb, int width) {
835   int i;
836   for (i = 0; i < width; ++i) {
837     const uint32 b = REPEAT8(src_argb0[0]);
838     const uint32 g = REPEAT8(src_argb0[1]);
839     const uint32 r = REPEAT8(src_argb0[2]);
840     const uint32 a = REPEAT8(src_argb0[3]);
841     const uint32 b_scale = src_argb1[0];
842     const uint32 g_scale = src_argb1[1];
843     const uint32 r_scale = src_argb1[2];
844     const uint32 a_scale = src_argb1[3];
845     dst_argb[0] = SHADE(b, b_scale);
846     dst_argb[1] = SHADE(g, g_scale);
847     dst_argb[2] = SHADE(r, r_scale);
848     dst_argb[3] = SHADE(a, a_scale);
849     src_argb0 += 4;
850     src_argb1 += 4;
851     dst_argb += 4;
852   }
853 }
854 #undef REPEAT8
855 #undef SHADE
856 
857 #define SHADE(f, v) clamp255(v + f)
858 
ARGBAddRow_C(const uint8 * src_argb0,const uint8 * src_argb1,uint8 * dst_argb,int width)859 void ARGBAddRow_C(const uint8* src_argb0, const uint8* src_argb1,
860                   uint8* dst_argb, int width) {
861   int i;
862   for (i = 0; i < width; ++i) {
863     const int b = src_argb0[0];
864     const int g = src_argb0[1];
865     const int r = src_argb0[2];
866     const int a = src_argb0[3];
867     const int b_add = src_argb1[0];
868     const int g_add = src_argb1[1];
869     const int r_add = src_argb1[2];
870     const int a_add = src_argb1[3];
871     dst_argb[0] = SHADE(b, b_add);
872     dst_argb[1] = SHADE(g, g_add);
873     dst_argb[2] = SHADE(r, r_add);
874     dst_argb[3] = SHADE(a, a_add);
875     src_argb0 += 4;
876     src_argb1 += 4;
877     dst_argb += 4;
878   }
879 }
880 #undef SHADE
881 
882 #define SHADE(f, v) clamp0(f - v)
883 
ARGBSubtractRow_C(const uint8 * src_argb0,const uint8 * src_argb1,uint8 * dst_argb,int width)884 void ARGBSubtractRow_C(const uint8* src_argb0, const uint8* src_argb1,
885                        uint8* dst_argb, int width) {
886   int i;
887   for (i = 0; i < width; ++i) {
888     const int b = src_argb0[0];
889     const int g = src_argb0[1];
890     const int r = src_argb0[2];
891     const int a = src_argb0[3];
892     const int b_sub = src_argb1[0];
893     const int g_sub = src_argb1[1];
894     const int r_sub = src_argb1[2];
895     const int a_sub = src_argb1[3];
896     dst_argb[0] = SHADE(b, b_sub);
897     dst_argb[1] = SHADE(g, g_sub);
898     dst_argb[2] = SHADE(r, r_sub);
899     dst_argb[3] = SHADE(a, a_sub);
900     src_argb0 += 4;
901     src_argb1 += 4;
902     dst_argb += 4;
903   }
904 }
905 #undef SHADE
906 
907 // Sobel functions which mimics SSSE3.
SobelXRow_C(const uint8 * src_y0,const uint8 * src_y1,const uint8 * src_y2,uint8 * dst_sobelx,int width)908 void SobelXRow_C(const uint8* src_y0, const uint8* src_y1, const uint8* src_y2,
909                  uint8* dst_sobelx, int width) {
910   int i;
911   for (i = 0; i < width; ++i) {
912     int a = src_y0[i];
913     int b = src_y1[i];
914     int c = src_y2[i];
915     int a_sub = src_y0[i + 2];
916     int b_sub = src_y1[i + 2];
917     int c_sub = src_y2[i + 2];
918     int a_diff = a - a_sub;
919     int b_diff = b - b_sub;
920     int c_diff = c - c_sub;
921     int sobel = Abs(a_diff + b_diff * 2 + c_diff);
922     dst_sobelx[i] = (uint8)(clamp255(sobel));
923   }
924 }
925 
SobelYRow_C(const uint8 * src_y0,const uint8 * src_y1,uint8 * dst_sobely,int width)926 void SobelYRow_C(const uint8* src_y0, const uint8* src_y1,
927                  uint8* dst_sobely, int width) {
928   int i;
929   for (i = 0; i < width; ++i) {
930     int a = src_y0[i + 0];
931     int b = src_y0[i + 1];
932     int c = src_y0[i + 2];
933     int a_sub = src_y1[i + 0];
934     int b_sub = src_y1[i + 1];
935     int c_sub = src_y1[i + 2];
936     int a_diff = a - a_sub;
937     int b_diff = b - b_sub;
938     int c_diff = c - c_sub;
939     int sobel = Abs(a_diff + b_diff * 2 + c_diff);
940     dst_sobely[i] = (uint8)(clamp255(sobel));
941   }
942 }
943 
SobelRow_C(const uint8 * src_sobelx,const uint8 * src_sobely,uint8 * dst_argb,int width)944 void SobelRow_C(const uint8* src_sobelx, const uint8* src_sobely,
945                 uint8* dst_argb, int width) {
946   int i;
947   for (i = 0; i < width; ++i) {
948     int r = src_sobelx[i];
949     int b = src_sobely[i];
950     int s = clamp255(r + b);
951     dst_argb[0] = (uint8)(s);
952     dst_argb[1] = (uint8)(s);
953     dst_argb[2] = (uint8)(s);
954     dst_argb[3] = (uint8)(255u);
955     dst_argb += 4;
956   }
957 }
958 
SobelToPlaneRow_C(const uint8 * src_sobelx,const uint8 * src_sobely,uint8 * dst_y,int width)959 void SobelToPlaneRow_C(const uint8* src_sobelx, const uint8* src_sobely,
960                        uint8* dst_y, int width) {
961   int i;
962   for (i = 0; i < width; ++i) {
963     int r = src_sobelx[i];
964     int b = src_sobely[i];
965     int s = clamp255(r + b);
966     dst_y[i] = (uint8)(s);
967   }
968 }
969 
SobelXYRow_C(const uint8 * src_sobelx,const uint8 * src_sobely,uint8 * dst_argb,int width)970 void SobelXYRow_C(const uint8* src_sobelx, const uint8* src_sobely,
971                   uint8* dst_argb, int width) {
972   int i;
973   for (i = 0; i < width; ++i) {
974     int r = src_sobelx[i];
975     int b = src_sobely[i];
976     int g = clamp255(r + b);
977     dst_argb[0] = (uint8)(b);
978     dst_argb[1] = (uint8)(g);
979     dst_argb[2] = (uint8)(r);
980     dst_argb[3] = (uint8)(255u);
981     dst_argb += 4;
982   }
983 }
984 
J400ToARGBRow_C(const uint8 * src_y,uint8 * dst_argb,int width)985 void J400ToARGBRow_C(const uint8* src_y, uint8* dst_argb, int width) {
986   // Copy a Y to RGB.
987   int x;
988   for (x = 0; x < width; ++x) {
989     uint8 y = src_y[0];
990     dst_argb[2] = dst_argb[1] = dst_argb[0] = y;
991     dst_argb[3] = 255u;
992     dst_argb += 4;
993     ++src_y;
994   }
995 }
996 
997 // BT.601 YUV to RGB reference
998 //  R = (Y - 16) * 1.164              - V * -1.596
999 //  G = (Y - 16) * 1.164 - U *  0.391 - V *  0.813
1000 //  B = (Y - 16) * 1.164 - U * -2.018
1001 
1002 // Y contribution to R,G,B.  Scale and bias.
1003 // TODO(fbarchard): Consider moving constants into a common header.
1004 #define YG 18997 /* round(1.164 * 64 * 256 * 256 / 257) */
1005 #define YGB -1160 /* 1.164 * 64 * -16 + 64 / 2 */
1006 
1007 // U and V contributions to R,G,B.
1008 #define UB -128 /* max(-128, round(-2.018 * 64)) */
1009 #define UG 25 /* round(0.391 * 64) */
1010 #define VG 52 /* round(0.813 * 64) */
1011 #define VR -102 /* round(-1.596 * 64) */
1012 
1013 // Bias values to subtract 16 from Y and 128 from U and V.
1014 #define BB (UB * 128 + YGB)
1015 #define BG (UG * 128 + VG * 128 + YGB)
1016 #define BR (VR * 128 + YGB)
1017 
1018 // C reference code that mimics the YUV assembly.
YuvPixel(uint8 y,uint8 u,uint8 v,uint8 * b,uint8 * g,uint8 * r)1019 static __inline void YuvPixel(uint8 y, uint8 u, uint8 v,
1020                               uint8* b, uint8* g, uint8* r) {
1021   uint32 y1 = (uint32)(y * 0x0101 * YG) >> 16;
1022   *b = Clamp((int32)(-(u * UB) + y1 + BB) >> 6);
1023   *g = Clamp((int32)(-(v * VG + u * UG) + y1 + BG) >> 6);
1024   *r = Clamp((int32)(-(v * VR)+ y1 + BR) >> 6);
1025 }
1026 
1027 // C reference code that mimics the YUV assembly.
YPixel(uint8 y,uint8 * b,uint8 * g,uint8 * r)1028 static __inline void YPixel(uint8 y, uint8* b, uint8* g, uint8* r) {
1029   uint32 y1 = (uint32)(y * 0x0101 * YG) >> 16;
1030   *b = Clamp((int32)(y1 + YGB) >> 6);
1031   *g = Clamp((int32)(y1 + YGB) >> 6);
1032   *r = Clamp((int32)(y1 + YGB) >> 6);
1033 }
1034 
1035 #undef YG
1036 #undef YGB
1037 #undef UB
1038 #undef UG
1039 #undef VG
1040 #undef VR
1041 #undef BB
1042 #undef BG
1043 #undef BR
1044 
1045 // JPEG YUV to RGB reference
1046 // *  R = Y                - V * -1.40200
1047 // *  G = Y - U *  0.34414 - V *  0.71414
1048 // *  B = Y - U * -1.77200
1049 
1050 // Y contribution to R,G,B.  Scale and bias.
1051 // TODO(fbarchard): Consider moving constants into a common header.
1052 #define YGJ 16320 /* round(1.000 * 64 * 256 * 256 / 257) */
1053 #define YGBJ 32  /* 64 / 2 */
1054 
1055 // U and V contributions to R,G,B.
1056 #define UBJ -113 /* round(-1.77200 * 64) */
1057 #define UGJ 22 /* round(0.34414 * 64) */
1058 #define VGJ 46 /* round(0.71414  * 64) */
1059 #define VRJ -90 /* round(-1.40200 * 64) */
1060 
1061 // Bias values to subtract 16 from Y and 128 from U and V.
1062 #define BBJ (UBJ * 128 + YGBJ)
1063 #define BGJ (UGJ * 128 + VGJ * 128 + YGBJ)
1064 #define BRJ (VRJ * 128 + YGBJ)
1065 
1066 // C reference code that mimics the YUV assembly.
YuvJPixel(uint8 y,uint8 u,uint8 v,uint8 * b,uint8 * g,uint8 * r)1067 static __inline void YuvJPixel(uint8 y, uint8 u, uint8 v,
1068                                uint8* b, uint8* g, uint8* r) {
1069   uint32 y1 = (uint32)(y * 0x0101 * YGJ) >> 16;
1070   *b = Clamp((int32)(-(u * UBJ) + y1 + BBJ) >> 6);
1071   *g = Clamp((int32)(-(v * VGJ + u * UGJ) + y1 + BGJ) >> 6);
1072   *r = Clamp((int32)(-(v * VRJ) + y1 + BRJ) >> 6);
1073 }
1074 
1075 #undef YGJ
1076 #undef YGBJ
1077 #undef UBJ
1078 #undef UGJ
1079 #undef VGJ
1080 #undef VRJ
1081 #undef BBJ
1082 #undef BGJ
1083 #undef BRJ
1084 
1085 #if !defined(LIBYUV_DISABLE_NEON) && \
1086     (defined(__ARM_NEON__) || defined(__aarch64__) || defined(LIBYUV_NEON))
1087 // C mimic assembly.
1088 // TODO(fbarchard): Remove subsampling from Neon.
I444ToARGBRow_C(const uint8 * src_y,const uint8 * src_u,const uint8 * src_v,uint8 * rgb_buf,int width)1089 void I444ToARGBRow_C(const uint8* src_y,
1090                      const uint8* src_u,
1091                      const uint8* src_v,
1092                      uint8* rgb_buf,
1093                      int width) {
1094   int x;
1095   for (x = 0; x < width - 1; x += 2) {
1096     uint8 u = (src_u[0] + src_u[1] + 1) >> 1;
1097     uint8 v = (src_v[0] + src_v[1] + 1) >> 1;
1098     YuvPixel(src_y[0], u, v, rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
1099     rgb_buf[3] = 255;
1100     YuvPixel(src_y[1], u, v, rgb_buf + 4, rgb_buf + 5, rgb_buf + 6);
1101     rgb_buf[7] = 255;
1102     src_y += 2;
1103     src_u += 2;
1104     src_v += 2;
1105     rgb_buf += 8;  // Advance 2 pixels.
1106   }
1107   if (width & 1) {
1108     YuvPixel(src_y[0], src_u[0], src_v[0],
1109              rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
1110   }
1111 }
1112 #else
I444ToARGBRow_C(const uint8 * src_y,const uint8 * src_u,const uint8 * src_v,uint8 * rgb_buf,int width)1113 void I444ToARGBRow_C(const uint8* src_y,
1114                      const uint8* src_u,
1115                      const uint8* src_v,
1116                      uint8* rgb_buf,
1117                      int width) {
1118   int x;
1119   for (x = 0; x < width; ++x) {
1120     YuvPixel(src_y[0], src_u[0], src_v[0],
1121              rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
1122     rgb_buf[3] = 255;
1123     src_y += 1;
1124     src_u += 1;
1125     src_v += 1;
1126     rgb_buf += 4;  // Advance 1 pixel.
1127   }
1128 }
1129 #endif
1130 
1131 // Also used for 420
I422ToARGBRow_C(const uint8 * src_y,const uint8 * src_u,const uint8 * src_v,uint8 * rgb_buf,int width)1132 void I422ToARGBRow_C(const uint8* src_y,
1133                      const uint8* src_u,
1134                      const uint8* src_v,
1135                      uint8* rgb_buf,
1136                      int width) {
1137   int x;
1138   for (x = 0; x < width - 1; x += 2) {
1139     YuvPixel(src_y[0], src_u[0], src_v[0],
1140              rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
1141     rgb_buf[3] = 255;
1142     YuvPixel(src_y[1], src_u[0], src_v[0],
1143              rgb_buf + 4, rgb_buf + 5, rgb_buf + 6);
1144     rgb_buf[7] = 255;
1145     src_y += 2;
1146     src_u += 1;
1147     src_v += 1;
1148     rgb_buf += 8;  // Advance 2 pixels.
1149   }
1150   if (width & 1) {
1151     YuvPixel(src_y[0], src_u[0], src_v[0],
1152              rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
1153     rgb_buf[3] = 255;
1154   }
1155 }
1156 
J422ToARGBRow_C(const uint8 * src_y,const uint8 * src_u,const uint8 * src_v,uint8 * rgb_buf,int width)1157 void J422ToARGBRow_C(const uint8* src_y,
1158                      const uint8* src_u,
1159                      const uint8* src_v,
1160                      uint8* rgb_buf,
1161                      int width) {
1162   int x;
1163   for (x = 0; x < width - 1; x += 2) {
1164     YuvJPixel(src_y[0], src_u[0], src_v[0],
1165               rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
1166     rgb_buf[3] = 255;
1167     YuvJPixel(src_y[1], src_u[0], src_v[0],
1168               rgb_buf + 4, rgb_buf + 5, rgb_buf + 6);
1169     rgb_buf[7] = 255;
1170     src_y += 2;
1171     src_u += 1;
1172     src_v += 1;
1173     rgb_buf += 8;  // Advance 2 pixels.
1174   }
1175   if (width & 1) {
1176     YuvJPixel(src_y[0], src_u[0], src_v[0],
1177               rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
1178     rgb_buf[3] = 255;
1179   }
1180 }
1181 
I422ToRGB24Row_C(const uint8 * src_y,const uint8 * src_u,const uint8 * src_v,uint8 * rgb_buf,int width)1182 void I422ToRGB24Row_C(const uint8* src_y,
1183                       const uint8* src_u,
1184                       const uint8* src_v,
1185                       uint8* rgb_buf,
1186                       int width) {
1187   int x;
1188   for (x = 0; x < width - 1; x += 2) {
1189     YuvPixel(src_y[0], src_u[0], src_v[0],
1190              rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
1191     YuvPixel(src_y[1], src_u[0], src_v[0],
1192              rgb_buf + 3, rgb_buf + 4, rgb_buf + 5);
1193     src_y += 2;
1194     src_u += 1;
1195     src_v += 1;
1196     rgb_buf += 6;  // Advance 2 pixels.
1197   }
1198   if (width & 1) {
1199     YuvPixel(src_y[0], src_u[0], src_v[0],
1200              rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
1201   }
1202 }
1203 
I422ToRAWRow_C(const uint8 * src_y,const uint8 * src_u,const uint8 * src_v,uint8 * rgb_buf,int width)1204 void I422ToRAWRow_C(const uint8* src_y,
1205                     const uint8* src_u,
1206                     const uint8* src_v,
1207                     uint8* rgb_buf,
1208                     int width) {
1209   int x;
1210   for (x = 0; x < width - 1; x += 2) {
1211     YuvPixel(src_y[0], src_u[0], src_v[0],
1212              rgb_buf + 2, rgb_buf + 1, rgb_buf + 0);
1213     YuvPixel(src_y[1], src_u[0], src_v[0],
1214              rgb_buf + 5, rgb_buf + 4, rgb_buf + 3);
1215     src_y += 2;
1216     src_u += 1;
1217     src_v += 1;
1218     rgb_buf += 6;  // Advance 2 pixels.
1219   }
1220   if (width & 1) {
1221     YuvPixel(src_y[0], src_u[0], src_v[0],
1222              rgb_buf + 2, rgb_buf + 1, rgb_buf + 0);
1223   }
1224 }
1225 
I422ToARGB4444Row_C(const uint8 * src_y,const uint8 * src_u,const uint8 * src_v,uint8 * dst_argb4444,int width)1226 void I422ToARGB4444Row_C(const uint8* src_y,
1227                          const uint8* src_u,
1228                          const uint8* src_v,
1229                          uint8* dst_argb4444,
1230                          int width) {
1231   uint8 b0;
1232   uint8 g0;
1233   uint8 r0;
1234   uint8 b1;
1235   uint8 g1;
1236   uint8 r1;
1237   int x;
1238   for (x = 0; x < width - 1; x += 2) {
1239     YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0);
1240     YuvPixel(src_y[1], src_u[0], src_v[0], &b1, &g1, &r1);
1241     b0 = b0 >> 4;
1242     g0 = g0 >> 4;
1243     r0 = r0 >> 4;
1244     b1 = b1 >> 4;
1245     g1 = g1 >> 4;
1246     r1 = r1 >> 4;
1247     *(uint32*)(dst_argb4444) = b0 | (g0 << 4) | (r0 << 8) |
1248         (b1 << 16) | (g1 << 20) | (r1 << 24) | 0xf000f000;
1249     src_y += 2;
1250     src_u += 1;
1251     src_v += 1;
1252     dst_argb4444 += 4;  // Advance 2 pixels.
1253   }
1254   if (width & 1) {
1255     YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0);
1256     b0 = b0 >> 4;
1257     g0 = g0 >> 4;
1258     r0 = r0 >> 4;
1259     *(uint16*)(dst_argb4444) = b0 | (g0 << 4) | (r0 << 8) |
1260         0xf000;
1261   }
1262 }
1263 
I422ToARGB1555Row_C(const uint8 * src_y,const uint8 * src_u,const uint8 * src_v,uint8 * dst_argb1555,int width)1264 void I422ToARGB1555Row_C(const uint8* src_y,
1265                          const uint8* src_u,
1266                          const uint8* src_v,
1267                          uint8* dst_argb1555,
1268                          int width) {
1269   uint8 b0;
1270   uint8 g0;
1271   uint8 r0;
1272   uint8 b1;
1273   uint8 g1;
1274   uint8 r1;
1275   int x;
1276   for (x = 0; x < width - 1; x += 2) {
1277     YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0);
1278     YuvPixel(src_y[1], src_u[0], src_v[0], &b1, &g1, &r1);
1279     b0 = b0 >> 3;
1280     g0 = g0 >> 3;
1281     r0 = r0 >> 3;
1282     b1 = b1 >> 3;
1283     g1 = g1 >> 3;
1284     r1 = r1 >> 3;
1285     *(uint32*)(dst_argb1555) = b0 | (g0 << 5) | (r0 << 10) |
1286         (b1 << 16) | (g1 << 21) | (r1 << 26) | 0x80008000;
1287     src_y += 2;
1288     src_u += 1;
1289     src_v += 1;
1290     dst_argb1555 += 4;  // Advance 2 pixels.
1291   }
1292   if (width & 1) {
1293     YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0);
1294     b0 = b0 >> 3;
1295     g0 = g0 >> 3;
1296     r0 = r0 >> 3;
1297     *(uint16*)(dst_argb1555) = b0 | (g0 << 5) | (r0 << 10) |
1298         0x8000;
1299   }
1300 }
1301 
I422ToRGB565Row_C(const uint8 * src_y,const uint8 * src_u,const uint8 * src_v,uint8 * dst_rgb565,int width)1302 void I422ToRGB565Row_C(const uint8* src_y,
1303                        const uint8* src_u,
1304                        const uint8* src_v,
1305                        uint8* dst_rgb565,
1306                        int width) {
1307   uint8 b0;
1308   uint8 g0;
1309   uint8 r0;
1310   uint8 b1;
1311   uint8 g1;
1312   uint8 r1;
1313   int x;
1314   for (x = 0; x < width - 1; x += 2) {
1315     YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0);
1316     YuvPixel(src_y[1], src_u[0], src_v[0], &b1, &g1, &r1);
1317     b0 = b0 >> 3;
1318     g0 = g0 >> 2;
1319     r0 = r0 >> 3;
1320     b1 = b1 >> 3;
1321     g1 = g1 >> 2;
1322     r1 = r1 >> 3;
1323     *(uint32*)(dst_rgb565) = b0 | (g0 << 5) | (r0 << 11) |
1324         (b1 << 16) | (g1 << 21) | (r1 << 27);
1325     src_y += 2;
1326     src_u += 1;
1327     src_v += 1;
1328     dst_rgb565 += 4;  // Advance 2 pixels.
1329   }
1330   if (width & 1) {
1331     YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0);
1332     b0 = b0 >> 3;
1333     g0 = g0 >> 2;
1334     r0 = r0 >> 3;
1335     *(uint16*)(dst_rgb565) = b0 | (g0 << 5) | (r0 << 11);
1336   }
1337 }
1338 
I411ToARGBRow_C(const uint8 * src_y,const uint8 * src_u,const uint8 * src_v,uint8 * rgb_buf,int width)1339 void I411ToARGBRow_C(const uint8* src_y,
1340                      const uint8* src_u,
1341                      const uint8* src_v,
1342                      uint8* rgb_buf,
1343                      int width) {
1344   int x;
1345   for (x = 0; x < width - 3; x += 4) {
1346     YuvPixel(src_y[0], src_u[0], src_v[0],
1347              rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
1348     rgb_buf[3] = 255;
1349     YuvPixel(src_y[1], src_u[0], src_v[0],
1350              rgb_buf + 4, rgb_buf + 5, rgb_buf + 6);
1351     rgb_buf[7] = 255;
1352     YuvPixel(src_y[2], src_u[0], src_v[0],
1353              rgb_buf + 8, rgb_buf + 9, rgb_buf + 10);
1354     rgb_buf[11] = 255;
1355     YuvPixel(src_y[3], src_u[0], src_v[0],
1356              rgb_buf + 12, rgb_buf + 13, rgb_buf + 14);
1357     rgb_buf[15] = 255;
1358     src_y += 4;
1359     src_u += 1;
1360     src_v += 1;
1361     rgb_buf += 16;  // Advance 4 pixels.
1362   }
1363   if (width & 2) {
1364     YuvPixel(src_y[0], src_u[0], src_v[0],
1365              rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
1366     rgb_buf[3] = 255;
1367     YuvPixel(src_y[1], src_u[0], src_v[0],
1368              rgb_buf + 4, rgb_buf + 5, rgb_buf + 6);
1369     rgb_buf[7] = 255;
1370     src_y += 2;
1371     rgb_buf += 8;  // Advance 2 pixels.
1372   }
1373   if (width & 1) {
1374     YuvPixel(src_y[0], src_u[0], src_v[0],
1375              rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
1376     rgb_buf[3] = 255;
1377   }
1378 }
1379 
NV12ToARGBRow_C(const uint8 * src_y,const uint8 * src_uv,uint8 * rgb_buf,int width)1380 void NV12ToARGBRow_C(const uint8* src_y,
1381                      const uint8* src_uv,
1382                      uint8* rgb_buf,
1383                      int width) {
1384   int x;
1385   for (x = 0; x < width - 1; x += 2) {
1386     YuvPixel(src_y[0], src_uv[0], src_uv[1],
1387              rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
1388     rgb_buf[3] = 255;
1389     YuvPixel(src_y[1], src_uv[0], src_uv[1],
1390              rgb_buf + 4, rgb_buf + 5, rgb_buf + 6);
1391     rgb_buf[7] = 255;
1392     src_y += 2;
1393     src_uv += 2;
1394     rgb_buf += 8;  // Advance 2 pixels.
1395   }
1396   if (width & 1) {
1397     YuvPixel(src_y[0], src_uv[0], src_uv[1],
1398              rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
1399     rgb_buf[3] = 255;
1400   }
1401 }
1402 
NV21ToARGBRow_C(const uint8 * src_y,const uint8 * src_vu,uint8 * rgb_buf,int width)1403 void NV21ToARGBRow_C(const uint8* src_y,
1404                      const uint8* src_vu,
1405                      uint8* rgb_buf,
1406                      int width) {
1407   int x;
1408   for (x = 0; x < width - 1; x += 2) {
1409     YuvPixel(src_y[0], src_vu[1], src_vu[0],
1410              rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
1411     rgb_buf[3] = 255;
1412 
1413     YuvPixel(src_y[1], src_vu[1], src_vu[0],
1414              rgb_buf + 4, rgb_buf + 5, rgb_buf + 6);
1415     rgb_buf[7] = 255;
1416 
1417     src_y += 2;
1418     src_vu += 2;
1419     rgb_buf += 8;  // Advance 2 pixels.
1420   }
1421   if (width & 1) {
1422     YuvPixel(src_y[0], src_vu[1], src_vu[0],
1423              rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
1424     rgb_buf[3] = 255;
1425   }
1426 }
1427 
NV12ToRGB565Row_C(const uint8 * src_y,const uint8 * src_uv,uint8 * dst_rgb565,int width)1428 void NV12ToRGB565Row_C(const uint8* src_y,
1429                        const uint8* src_uv,
1430                        uint8* dst_rgb565,
1431                        int width) {
1432   uint8 b0;
1433   uint8 g0;
1434   uint8 r0;
1435   uint8 b1;
1436   uint8 g1;
1437   uint8 r1;
1438   int x;
1439   for (x = 0; x < width - 1; x += 2) {
1440     YuvPixel(src_y[0], src_uv[0], src_uv[1], &b0, &g0, &r0);
1441     YuvPixel(src_y[1], src_uv[0], src_uv[1], &b1, &g1, &r1);
1442     b0 = b0 >> 3;
1443     g0 = g0 >> 2;
1444     r0 = r0 >> 3;
1445     b1 = b1 >> 3;
1446     g1 = g1 >> 2;
1447     r1 = r1 >> 3;
1448     *(uint32*)(dst_rgb565) = b0 | (g0 << 5) | (r0 << 11) |
1449         (b1 << 16) | (g1 << 21) | (r1 << 27);
1450     src_y += 2;
1451     src_uv += 2;
1452     dst_rgb565 += 4;  // Advance 2 pixels.
1453   }
1454   if (width & 1) {
1455     YuvPixel(src_y[0], src_uv[0], src_uv[1], &b0, &g0, &r0);
1456     b0 = b0 >> 3;
1457     g0 = g0 >> 2;
1458     r0 = r0 >> 3;
1459     *(uint16*)(dst_rgb565) = b0 | (g0 << 5) | (r0 << 11);
1460   }
1461 }
1462 
NV21ToRGB565Row_C(const uint8 * src_y,const uint8 * vsrc_u,uint8 * dst_rgb565,int width)1463 void NV21ToRGB565Row_C(const uint8* src_y,
1464                        const uint8* vsrc_u,
1465                        uint8* dst_rgb565,
1466                        int width) {
1467   uint8 b0;
1468   uint8 g0;
1469   uint8 r0;
1470   uint8 b1;
1471   uint8 g1;
1472   uint8 r1;
1473   int x;
1474   for (x = 0; x < width - 1; x += 2) {
1475     YuvPixel(src_y[0], vsrc_u[1], vsrc_u[0], &b0, &g0, &r0);
1476     YuvPixel(src_y[1], vsrc_u[1], vsrc_u[0], &b1, &g1, &r1);
1477     b0 = b0 >> 3;
1478     g0 = g0 >> 2;
1479     r0 = r0 >> 3;
1480     b1 = b1 >> 3;
1481     g1 = g1 >> 2;
1482     r1 = r1 >> 3;
1483     *(uint32*)(dst_rgb565) = b0 | (g0 << 5) | (r0 << 11) |
1484         (b1 << 16) | (g1 << 21) | (r1 << 27);
1485     src_y += 2;
1486     vsrc_u += 2;
1487     dst_rgb565 += 4;  // Advance 2 pixels.
1488   }
1489   if (width & 1) {
1490     YuvPixel(src_y[0], vsrc_u[1], vsrc_u[0], &b0, &g0, &r0);
1491     b0 = b0 >> 3;
1492     g0 = g0 >> 2;
1493     r0 = r0 >> 3;
1494     *(uint16*)(dst_rgb565) = b0 | (g0 << 5) | (r0 << 11);
1495   }
1496 }
1497 
YUY2ToARGBRow_C(const uint8 * src_yuy2,uint8 * rgb_buf,int width)1498 void YUY2ToARGBRow_C(const uint8* src_yuy2,
1499                      uint8* rgb_buf,
1500                      int width) {
1501   int x;
1502   for (x = 0; x < width - 1; x += 2) {
1503     YuvPixel(src_yuy2[0], src_yuy2[1], src_yuy2[3],
1504              rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
1505     rgb_buf[3] = 255;
1506     YuvPixel(src_yuy2[2], src_yuy2[1], src_yuy2[3],
1507              rgb_buf + 4, rgb_buf + 5, rgb_buf + 6);
1508     rgb_buf[7] = 255;
1509     src_yuy2 += 4;
1510     rgb_buf += 8;  // Advance 2 pixels.
1511   }
1512   if (width & 1) {
1513     YuvPixel(src_yuy2[0], src_yuy2[1], src_yuy2[3],
1514              rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
1515     rgb_buf[3] = 255;
1516   }
1517 }
1518 
UYVYToARGBRow_C(const uint8 * src_uyvy,uint8 * rgb_buf,int width)1519 void UYVYToARGBRow_C(const uint8* src_uyvy,
1520                      uint8* rgb_buf,
1521                      int width) {
1522   int x;
1523   for (x = 0; x < width - 1; x += 2) {
1524     YuvPixel(src_uyvy[1], src_uyvy[0], src_uyvy[2],
1525              rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
1526     rgb_buf[3] = 255;
1527     YuvPixel(src_uyvy[3], src_uyvy[0], src_uyvy[2],
1528              rgb_buf + 4, rgb_buf + 5, rgb_buf + 6);
1529     rgb_buf[7] = 255;
1530     src_uyvy += 4;
1531     rgb_buf += 8;  // Advance 2 pixels.
1532   }
1533   if (width & 1) {
1534     YuvPixel(src_uyvy[1], src_uyvy[0], src_uyvy[2],
1535              rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
1536     rgb_buf[3] = 255;
1537   }
1538 }
1539 
I422ToBGRARow_C(const uint8 * src_y,const uint8 * src_u,const uint8 * src_v,uint8 * rgb_buf,int width)1540 void I422ToBGRARow_C(const uint8* src_y,
1541                      const uint8* src_u,
1542                      const uint8* src_v,
1543                      uint8* rgb_buf,
1544                      int width) {
1545   int x;
1546   for (x = 0; x < width - 1; x += 2) {
1547     YuvPixel(src_y[0], src_u[0], src_v[0],
1548              rgb_buf + 3, rgb_buf + 2, rgb_buf + 1);
1549     rgb_buf[0] = 255;
1550     YuvPixel(src_y[1], src_u[0], src_v[0],
1551              rgb_buf + 7, rgb_buf + 6, rgb_buf + 5);
1552     rgb_buf[4] = 255;
1553     src_y += 2;
1554     src_u += 1;
1555     src_v += 1;
1556     rgb_buf += 8;  // Advance 2 pixels.
1557   }
1558   if (width & 1) {
1559     YuvPixel(src_y[0], src_u[0], src_v[0],
1560              rgb_buf + 3, rgb_buf + 2, rgb_buf + 1);
1561     rgb_buf[0] = 255;
1562   }
1563 }
1564 
I422ToABGRRow_C(const uint8 * src_y,const uint8 * src_u,const uint8 * src_v,uint8 * rgb_buf,int width)1565 void I422ToABGRRow_C(const uint8* src_y,
1566                      const uint8* src_u,
1567                      const uint8* src_v,
1568                      uint8* rgb_buf,
1569                      int width) {
1570   int x;
1571   for (x = 0; x < width - 1; x += 2) {
1572     YuvPixel(src_y[0], src_u[0], src_v[0],
1573              rgb_buf + 2, rgb_buf + 1, rgb_buf + 0);
1574     rgb_buf[3] = 255;
1575     YuvPixel(src_y[1], src_u[0], src_v[0],
1576              rgb_buf + 6, rgb_buf + 5, rgb_buf + 4);
1577     rgb_buf[7] = 255;
1578     src_y += 2;
1579     src_u += 1;
1580     src_v += 1;
1581     rgb_buf += 8;  // Advance 2 pixels.
1582   }
1583   if (width & 1) {
1584     YuvPixel(src_y[0], src_u[0], src_v[0],
1585              rgb_buf + 2, rgb_buf + 1, rgb_buf + 0);
1586     rgb_buf[3] = 255;
1587   }
1588 }
1589 
I422ToRGBARow_C(const uint8 * src_y,const uint8 * src_u,const uint8 * src_v,uint8 * rgb_buf,int width)1590 void I422ToRGBARow_C(const uint8* src_y,
1591                      const uint8* src_u,
1592                      const uint8* src_v,
1593                      uint8* rgb_buf,
1594                      int width) {
1595   int x;
1596   for (x = 0; x < width - 1; x += 2) {
1597     YuvPixel(src_y[0], src_u[0], src_v[0],
1598              rgb_buf + 1, rgb_buf + 2, rgb_buf + 3);
1599     rgb_buf[0] = 255;
1600     YuvPixel(src_y[1], src_u[0], src_v[0],
1601              rgb_buf + 5, rgb_buf + 6, rgb_buf + 7);
1602     rgb_buf[4] = 255;
1603     src_y += 2;
1604     src_u += 1;
1605     src_v += 1;
1606     rgb_buf += 8;  // Advance 2 pixels.
1607   }
1608   if (width & 1) {
1609     YuvPixel(src_y[0], src_u[0], src_v[0],
1610              rgb_buf + 1, rgb_buf + 2, rgb_buf + 3);
1611     rgb_buf[0] = 255;
1612   }
1613 }
1614 
I400ToARGBRow_C(const uint8 * src_y,uint8 * rgb_buf,int width)1615 void I400ToARGBRow_C(const uint8* src_y, uint8* rgb_buf, int width) {
1616   int x;
1617   for (x = 0; x < width - 1; x += 2) {
1618     YPixel(src_y[0], rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
1619     rgb_buf[3] = 255;
1620     YPixel(src_y[1], rgb_buf + 4, rgb_buf + 5, rgb_buf + 6);
1621     rgb_buf[7] = 255;
1622     src_y += 2;
1623     rgb_buf += 8;  // Advance 2 pixels.
1624   }
1625   if (width & 1) {
1626     YPixel(src_y[0], rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
1627     rgb_buf[3] = 255;
1628   }
1629 }
1630 
MirrorRow_C(const uint8 * src,uint8 * dst,int width)1631 void MirrorRow_C(const uint8* src, uint8* dst, int width) {
1632   int x;
1633   src += width - 1;
1634   for (x = 0; x < width - 1; x += 2) {
1635     dst[x] = src[0];
1636     dst[x + 1] = src[-1];
1637     src -= 2;
1638   }
1639   if (width & 1) {
1640     dst[width - 1] = src[0];
1641   }
1642 }
1643 
MirrorUVRow_C(const uint8 * src_uv,uint8 * dst_u,uint8 * dst_v,int width)1644 void MirrorUVRow_C(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int width) {
1645   int x;
1646   src_uv += (width - 1) << 1;
1647   for (x = 0; x < width - 1; x += 2) {
1648     dst_u[x] = src_uv[0];
1649     dst_u[x + 1] = src_uv[-2];
1650     dst_v[x] = src_uv[1];
1651     dst_v[x + 1] = src_uv[-2 + 1];
1652     src_uv -= 4;
1653   }
1654   if (width & 1) {
1655     dst_u[width - 1] = src_uv[0];
1656     dst_v[width - 1] = src_uv[1];
1657   }
1658 }
1659 
ARGBMirrorRow_C(const uint8 * src,uint8 * dst,int width)1660 void ARGBMirrorRow_C(const uint8* src, uint8* dst, int width) {
1661   int x;
1662   const uint32* src32 = (const uint32*)(src);
1663   uint32* dst32 = (uint32*)(dst);
1664   src32 += width - 1;
1665   for (x = 0; x < width - 1; x += 2) {
1666     dst32[x] = src32[0];
1667     dst32[x + 1] = src32[-1];
1668     src32 -= 2;
1669   }
1670   if (width & 1) {
1671     dst32[width - 1] = src32[0];
1672   }
1673 }
1674 
SplitUVRow_C(const uint8 * src_uv,uint8 * dst_u,uint8 * dst_v,int width)1675 void SplitUVRow_C(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int width) {
1676   int x;
1677   for (x = 0; x < width - 1; x += 2) {
1678     dst_u[x] = src_uv[0];
1679     dst_u[x + 1] = src_uv[2];
1680     dst_v[x] = src_uv[1];
1681     dst_v[x + 1] = src_uv[3];
1682     src_uv += 4;
1683   }
1684   if (width & 1) {
1685     dst_u[width - 1] = src_uv[0];
1686     dst_v[width - 1] = src_uv[1];
1687   }
1688 }
1689 
MergeUVRow_C(const uint8 * src_u,const uint8 * src_v,uint8 * dst_uv,int width)1690 void MergeUVRow_C(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
1691                   int width) {
1692   int x;
1693   for (x = 0; x < width - 1; x += 2) {
1694     dst_uv[0] = src_u[x];
1695     dst_uv[1] = src_v[x];
1696     dst_uv[2] = src_u[x + 1];
1697     dst_uv[3] = src_v[x + 1];
1698     dst_uv += 4;
1699   }
1700   if (width & 1) {
1701     dst_uv[0] = src_u[width - 1];
1702     dst_uv[1] = src_v[width - 1];
1703   }
1704 }
1705 
CopyRow_C(const uint8 * src,uint8 * dst,int count)1706 void CopyRow_C(const uint8* src, uint8* dst, int count) {
1707   memcpy(dst, src, count);
1708 }
1709 
CopyRow_16_C(const uint16 * src,uint16 * dst,int count)1710 void CopyRow_16_C(const uint16* src, uint16* dst, int count) {
1711   memcpy(dst, src, count * 2);
1712 }
1713 
SetRow_C(uint8 * dst,uint8 v8,int width)1714 void SetRow_C(uint8* dst, uint8 v8, int width) {
1715   memset(dst, v8, width);
1716 }
1717 
ARGBSetRow_C(uint8 * dst_argb,uint32 v32,int width)1718 void ARGBSetRow_C(uint8* dst_argb, uint32 v32, int width) {
1719   uint32* d = (uint32*)(dst_argb);
1720   int x;
1721   for (x = 0; x < width; ++x) {
1722     d[x] = v32;
1723   }
1724 }
1725 
1726 // Filter 2 rows of YUY2 UV's (422) into U and V (420).
YUY2ToUVRow_C(const uint8 * src_yuy2,int src_stride_yuy2,uint8 * dst_u,uint8 * dst_v,int width)1727 void YUY2ToUVRow_C(const uint8* src_yuy2, int src_stride_yuy2,
1728                    uint8* dst_u, uint8* dst_v, int width) {
1729   // Output a row of UV values, filtering 2 rows of YUY2.
1730   int x;
1731   for (x = 0; x < width; x += 2) {
1732     dst_u[0] = (src_yuy2[1] + src_yuy2[src_stride_yuy2 + 1] + 1) >> 1;
1733     dst_v[0] = (src_yuy2[3] + src_yuy2[src_stride_yuy2 + 3] + 1) >> 1;
1734     src_yuy2 += 4;
1735     dst_u += 1;
1736     dst_v += 1;
1737   }
1738 }
1739 
1740 // Copy row of YUY2 UV's (422) into U and V (422).
YUY2ToUV422Row_C(const uint8 * src_yuy2,uint8 * dst_u,uint8 * dst_v,int width)1741 void YUY2ToUV422Row_C(const uint8* src_yuy2,
1742                       uint8* dst_u, uint8* dst_v, int width) {
1743   // Output a row of UV values.
1744   int x;
1745   for (x = 0; x < width; x += 2) {
1746     dst_u[0] = src_yuy2[1];
1747     dst_v[0] = src_yuy2[3];
1748     src_yuy2 += 4;
1749     dst_u += 1;
1750     dst_v += 1;
1751   }
1752 }
1753 
1754 // Copy row of YUY2 Y's (422) into Y (420/422).
YUY2ToYRow_C(const uint8 * src_yuy2,uint8 * dst_y,int width)1755 void YUY2ToYRow_C(const uint8* src_yuy2, uint8* dst_y, int width) {
1756   // Output a row of Y values.
1757   int x;
1758   for (x = 0; x < width - 1; x += 2) {
1759     dst_y[x] = src_yuy2[0];
1760     dst_y[x + 1] = src_yuy2[2];
1761     src_yuy2 += 4;
1762   }
1763   if (width & 1) {
1764     dst_y[width - 1] = src_yuy2[0];
1765   }
1766 }
1767 
1768 // Filter 2 rows of UYVY UV's (422) into U and V (420).
UYVYToUVRow_C(const uint8 * src_uyvy,int src_stride_uyvy,uint8 * dst_u,uint8 * dst_v,int width)1769 void UYVYToUVRow_C(const uint8* src_uyvy, int src_stride_uyvy,
1770                    uint8* dst_u, uint8* dst_v, int width) {
1771   // Output a row of UV values.
1772   int x;
1773   for (x = 0; x < width; x += 2) {
1774     dst_u[0] = (src_uyvy[0] + src_uyvy[src_stride_uyvy + 0] + 1) >> 1;
1775     dst_v[0] = (src_uyvy[2] + src_uyvy[src_stride_uyvy + 2] + 1) >> 1;
1776     src_uyvy += 4;
1777     dst_u += 1;
1778     dst_v += 1;
1779   }
1780 }
1781 
1782 // Copy row of UYVY UV's (422) into U and V (422).
UYVYToUV422Row_C(const uint8 * src_uyvy,uint8 * dst_u,uint8 * dst_v,int width)1783 void UYVYToUV422Row_C(const uint8* src_uyvy,
1784                       uint8* dst_u, uint8* dst_v, int width) {
1785   // Output a row of UV values.
1786   int x;
1787   for (x = 0; x < width; x += 2) {
1788     dst_u[0] = src_uyvy[0];
1789     dst_v[0] = src_uyvy[2];
1790     src_uyvy += 4;
1791     dst_u += 1;
1792     dst_v += 1;
1793   }
1794 }
1795 
1796 // Copy row of UYVY Y's (422) into Y (420/422).
UYVYToYRow_C(const uint8 * src_uyvy,uint8 * dst_y,int width)1797 void UYVYToYRow_C(const uint8* src_uyvy, uint8* dst_y, int width) {
1798   // Output a row of Y values.
1799   int x;
1800   for (x = 0; x < width - 1; x += 2) {
1801     dst_y[x] = src_uyvy[1];
1802     dst_y[x + 1] = src_uyvy[3];
1803     src_uyvy += 4;
1804   }
1805   if (width & 1) {
1806     dst_y[width - 1] = src_uyvy[1];
1807   }
1808 }
1809 
1810 #define BLEND(f, b, a) (((256 - a) * b) >> 8) + f
1811 
1812 // Blend src_argb0 over src_argb1 and store to dst_argb.
1813 // dst_argb may be src_argb0 or src_argb1.
1814 // This code mimics the SSSE3 version for better testability.
ARGBBlendRow_C(const uint8 * src_argb0,const uint8 * src_argb1,uint8 * dst_argb,int width)1815 void ARGBBlendRow_C(const uint8* src_argb0, const uint8* src_argb1,
1816                     uint8* dst_argb, int width) {
1817   int x;
1818   for (x = 0; x < width - 1; x += 2) {
1819     uint32 fb = src_argb0[0];
1820     uint32 fg = src_argb0[1];
1821     uint32 fr = src_argb0[2];
1822     uint32 a = src_argb0[3];
1823     uint32 bb = src_argb1[0];
1824     uint32 bg = src_argb1[1];
1825     uint32 br = src_argb1[2];
1826     dst_argb[0] = BLEND(fb, bb, a);
1827     dst_argb[1] = BLEND(fg, bg, a);
1828     dst_argb[2] = BLEND(fr, br, a);
1829     dst_argb[3] = 255u;
1830 
1831     fb = src_argb0[4 + 0];
1832     fg = src_argb0[4 + 1];
1833     fr = src_argb0[4 + 2];
1834     a = src_argb0[4 + 3];
1835     bb = src_argb1[4 + 0];
1836     bg = src_argb1[4 + 1];
1837     br = src_argb1[4 + 2];
1838     dst_argb[4 + 0] = BLEND(fb, bb, a);
1839     dst_argb[4 + 1] = BLEND(fg, bg, a);
1840     dst_argb[4 + 2] = BLEND(fr, br, a);
1841     dst_argb[4 + 3] = 255u;
1842     src_argb0 += 8;
1843     src_argb1 += 8;
1844     dst_argb += 8;
1845   }
1846 
1847   if (width & 1) {
1848     uint32 fb = src_argb0[0];
1849     uint32 fg = src_argb0[1];
1850     uint32 fr = src_argb0[2];
1851     uint32 a = src_argb0[3];
1852     uint32 bb = src_argb1[0];
1853     uint32 bg = src_argb1[1];
1854     uint32 br = src_argb1[2];
1855     dst_argb[0] = BLEND(fb, bb, a);
1856     dst_argb[1] = BLEND(fg, bg, a);
1857     dst_argb[2] = BLEND(fr, br, a);
1858     dst_argb[3] = 255u;
1859   }
1860 }
1861 #undef BLEND
1862 #define ATTENUATE(f, a) (a | (a << 8)) * (f | (f << 8)) >> 24
1863 
1864 // Multiply source RGB by alpha and store to destination.
1865 // This code mimics the SSSE3 version for better testability.
ARGBAttenuateRow_C(const uint8 * src_argb,uint8 * dst_argb,int width)1866 void ARGBAttenuateRow_C(const uint8* src_argb, uint8* dst_argb, int width) {
1867   int i;
1868   for (i = 0; i < width - 1; i += 2) {
1869     uint32 b = src_argb[0];
1870     uint32 g = src_argb[1];
1871     uint32 r = src_argb[2];
1872     uint32 a = src_argb[3];
1873     dst_argb[0] = ATTENUATE(b, a);
1874     dst_argb[1] = ATTENUATE(g, a);
1875     dst_argb[2] = ATTENUATE(r, a);
1876     dst_argb[3] = a;
1877     b = src_argb[4];
1878     g = src_argb[5];
1879     r = src_argb[6];
1880     a = src_argb[7];
1881     dst_argb[4] = ATTENUATE(b, a);
1882     dst_argb[5] = ATTENUATE(g, a);
1883     dst_argb[6] = ATTENUATE(r, a);
1884     dst_argb[7] = a;
1885     src_argb += 8;
1886     dst_argb += 8;
1887   }
1888 
1889   if (width & 1) {
1890     const uint32 b = src_argb[0];
1891     const uint32 g = src_argb[1];
1892     const uint32 r = src_argb[2];
1893     const uint32 a = src_argb[3];
1894     dst_argb[0] = ATTENUATE(b, a);
1895     dst_argb[1] = ATTENUATE(g, a);
1896     dst_argb[2] = ATTENUATE(r, a);
1897     dst_argb[3] = a;
1898   }
1899 }
1900 #undef ATTENUATE
1901 
1902 // Divide source RGB by alpha and store to destination.
1903 // b = (b * 255 + (a / 2)) / a;
1904 // g = (g * 255 + (a / 2)) / a;
1905 // r = (r * 255 + (a / 2)) / a;
1906 // Reciprocal method is off by 1 on some values. ie 125
1907 // 8.8 fixed point inverse table with 1.0 in upper short and 1 / a in lower.
1908 #define T(a) 0x01000000 + (0x10000 / a)
1909 const uint32 fixed_invtbl8[256] = {
1910   0x01000000, 0x0100ffff, T(0x02), T(0x03), T(0x04), T(0x05), T(0x06), T(0x07),
1911   T(0x08), T(0x09), T(0x0a), T(0x0b), T(0x0c), T(0x0d), T(0x0e), T(0x0f),
1912   T(0x10), T(0x11), T(0x12), T(0x13), T(0x14), T(0x15), T(0x16), T(0x17),
1913   T(0x18), T(0x19), T(0x1a), T(0x1b), T(0x1c), T(0x1d), T(0x1e), T(0x1f),
1914   T(0x20), T(0x21), T(0x22), T(0x23), T(0x24), T(0x25), T(0x26), T(0x27),
1915   T(0x28), T(0x29), T(0x2a), T(0x2b), T(0x2c), T(0x2d), T(0x2e), T(0x2f),
1916   T(0x30), T(0x31), T(0x32), T(0x33), T(0x34), T(0x35), T(0x36), T(0x37),
1917   T(0x38), T(0x39), T(0x3a), T(0x3b), T(0x3c), T(0x3d), T(0x3e), T(0x3f),
1918   T(0x40), T(0x41), T(0x42), T(0x43), T(0x44), T(0x45), T(0x46), T(0x47),
1919   T(0x48), T(0x49), T(0x4a), T(0x4b), T(0x4c), T(0x4d), T(0x4e), T(0x4f),
1920   T(0x50), T(0x51), T(0x52), T(0x53), T(0x54), T(0x55), T(0x56), T(0x57),
1921   T(0x58), T(0x59), T(0x5a), T(0x5b), T(0x5c), T(0x5d), T(0x5e), T(0x5f),
1922   T(0x60), T(0x61), T(0x62), T(0x63), T(0x64), T(0x65), T(0x66), T(0x67),
1923   T(0x68), T(0x69), T(0x6a), T(0x6b), T(0x6c), T(0x6d), T(0x6e), T(0x6f),
1924   T(0x70), T(0x71), T(0x72), T(0x73), T(0x74), T(0x75), T(0x76), T(0x77),
1925   T(0x78), T(0x79), T(0x7a), T(0x7b), T(0x7c), T(0x7d), T(0x7e), T(0x7f),
1926   T(0x80), T(0x81), T(0x82), T(0x83), T(0x84), T(0x85), T(0x86), T(0x87),
1927   T(0x88), T(0x89), T(0x8a), T(0x8b), T(0x8c), T(0x8d), T(0x8e), T(0x8f),
1928   T(0x90), T(0x91), T(0x92), T(0x93), T(0x94), T(0x95), T(0x96), T(0x97),
1929   T(0x98), T(0x99), T(0x9a), T(0x9b), T(0x9c), T(0x9d), T(0x9e), T(0x9f),
1930   T(0xa0), T(0xa1), T(0xa2), T(0xa3), T(0xa4), T(0xa5), T(0xa6), T(0xa7),
1931   T(0xa8), T(0xa9), T(0xaa), T(0xab), T(0xac), T(0xad), T(0xae), T(0xaf),
1932   T(0xb0), T(0xb1), T(0xb2), T(0xb3), T(0xb4), T(0xb5), T(0xb6), T(0xb7),
1933   T(0xb8), T(0xb9), T(0xba), T(0xbb), T(0xbc), T(0xbd), T(0xbe), T(0xbf),
1934   T(0xc0), T(0xc1), T(0xc2), T(0xc3), T(0xc4), T(0xc5), T(0xc6), T(0xc7),
1935   T(0xc8), T(0xc9), T(0xca), T(0xcb), T(0xcc), T(0xcd), T(0xce), T(0xcf),
1936   T(0xd0), T(0xd1), T(0xd2), T(0xd3), T(0xd4), T(0xd5), T(0xd6), T(0xd7),
1937   T(0xd8), T(0xd9), T(0xda), T(0xdb), T(0xdc), T(0xdd), T(0xde), T(0xdf),
1938   T(0xe0), T(0xe1), T(0xe2), T(0xe3), T(0xe4), T(0xe5), T(0xe6), T(0xe7),
1939   T(0xe8), T(0xe9), T(0xea), T(0xeb), T(0xec), T(0xed), T(0xee), T(0xef),
1940   T(0xf0), T(0xf1), T(0xf2), T(0xf3), T(0xf4), T(0xf5), T(0xf6), T(0xf7),
1941   T(0xf8), T(0xf9), T(0xfa), T(0xfb), T(0xfc), T(0xfd), T(0xfe), 0x01000100 };
1942 #undef T
1943 
ARGBUnattenuateRow_C(const uint8 * src_argb,uint8 * dst_argb,int width)1944 void ARGBUnattenuateRow_C(const uint8* src_argb, uint8* dst_argb, int width) {
1945   int i;
1946   for (i = 0; i < width; ++i) {
1947     uint32 b = src_argb[0];
1948     uint32 g = src_argb[1];
1949     uint32 r = src_argb[2];
1950     const uint32 a = src_argb[3];
1951     const uint32 ia = fixed_invtbl8[a] & 0xffff;  // 8.8 fixed point
1952     b = (b * ia) >> 8;
1953     g = (g * ia) >> 8;
1954     r = (r * ia) >> 8;
1955     // Clamping should not be necessary but is free in assembly.
1956     dst_argb[0] = clamp255(b);
1957     dst_argb[1] = clamp255(g);
1958     dst_argb[2] = clamp255(r);
1959     dst_argb[3] = a;
1960     src_argb += 4;
1961     dst_argb += 4;
1962   }
1963 }
1964 
ComputeCumulativeSumRow_C(const uint8 * row,int32 * cumsum,const int32 * previous_cumsum,int width)1965 void ComputeCumulativeSumRow_C(const uint8* row, int32* cumsum,
1966                                const int32* previous_cumsum, int width) {
1967   int32 row_sum[4] = {0, 0, 0, 0};
1968   int x;
1969   for (x = 0; x < width; ++x) {
1970     row_sum[0] += row[x * 4 + 0];
1971     row_sum[1] += row[x * 4 + 1];
1972     row_sum[2] += row[x * 4 + 2];
1973     row_sum[3] += row[x * 4 + 3];
1974     cumsum[x * 4 + 0] = row_sum[0]  + previous_cumsum[x * 4 + 0];
1975     cumsum[x * 4 + 1] = row_sum[1]  + previous_cumsum[x * 4 + 1];
1976     cumsum[x * 4 + 2] = row_sum[2]  + previous_cumsum[x * 4 + 2];
1977     cumsum[x * 4 + 3] = row_sum[3]  + previous_cumsum[x * 4 + 3];
1978   }
1979 }
1980 
CumulativeSumToAverageRow_C(const int32 * tl,const int32 * bl,int w,int area,uint8 * dst,int count)1981 void CumulativeSumToAverageRow_C(const int32* tl, const int32* bl,
1982                                 int w, int area, uint8* dst, int count) {
1983   float ooa = 1.0f / area;
1984   int i;
1985   for (i = 0; i < count; ++i) {
1986     dst[0] = (uint8)((bl[w + 0] + tl[0] - bl[0] - tl[w + 0]) * ooa);
1987     dst[1] = (uint8)((bl[w + 1] + tl[1] - bl[1] - tl[w + 1]) * ooa);
1988     dst[2] = (uint8)((bl[w + 2] + tl[2] - bl[2] - tl[w + 2]) * ooa);
1989     dst[3] = (uint8)((bl[w + 3] + tl[3] - bl[3] - tl[w + 3]) * ooa);
1990     dst += 4;
1991     tl += 4;
1992     bl += 4;
1993   }
1994 }
1995 
1996 // Copy pixels from rotated source to destination row with a slope.
1997 LIBYUV_API
ARGBAffineRow_C(const uint8 * src_argb,int src_argb_stride,uint8 * dst_argb,const float * uv_dudv,int width)1998 void ARGBAffineRow_C(const uint8* src_argb, int src_argb_stride,
1999                      uint8* dst_argb, const float* uv_dudv, int width) {
2000   int i;
2001   // Render a row of pixels from source into a buffer.
2002   float uv[2];
2003   uv[0] = uv_dudv[0];
2004   uv[1] = uv_dudv[1];
2005   for (i = 0; i < width; ++i) {
2006     int x = (int)(uv[0]);
2007     int y = (int)(uv[1]);
2008     *(uint32*)(dst_argb) =
2009         *(const uint32*)(src_argb + y * src_argb_stride +
2010                                          x * 4);
2011     dst_argb += 4;
2012     uv[0] += uv_dudv[2];
2013     uv[1] += uv_dudv[3];
2014   }
2015 }
2016 
2017 // Blend 2 rows into 1.
HalfRow_C(const uint8 * src_uv,int src_uv_stride,uint8 * dst_uv,int pix)2018 static void HalfRow_C(const uint8* src_uv, int src_uv_stride,
2019                       uint8* dst_uv, int pix) {
2020   int x;
2021   for (x = 0; x < pix; ++x) {
2022     dst_uv[x] = (src_uv[x] + src_uv[src_uv_stride + x] + 1) >> 1;
2023   }
2024 }
2025 
HalfRow_16_C(const uint16 * src_uv,int src_uv_stride,uint16 * dst_uv,int pix)2026 static void HalfRow_16_C(const uint16* src_uv, int src_uv_stride,
2027                          uint16* dst_uv, int pix) {
2028   int x;
2029   for (x = 0; x < pix; ++x) {
2030     dst_uv[x] = (src_uv[x] + src_uv[src_uv_stride + x] + 1) >> 1;
2031   }
2032 }
2033 
2034 // C version 2x2 -> 2x1.
InterpolateRow_C(uint8 * dst_ptr,const uint8 * src_ptr,ptrdiff_t src_stride,int width,int source_y_fraction)2035 void InterpolateRow_C(uint8* dst_ptr, const uint8* src_ptr,
2036                       ptrdiff_t src_stride,
2037                       int width, int source_y_fraction) {
2038   int y1_fraction = source_y_fraction;
2039   int y0_fraction = 256 - y1_fraction;
2040   const uint8* src_ptr1 = src_ptr + src_stride;
2041   int x;
2042   if (source_y_fraction == 0) {
2043     memcpy(dst_ptr, src_ptr, width);
2044     return;
2045   }
2046   if (source_y_fraction == 128) {
2047     HalfRow_C(src_ptr, (int)(src_stride), dst_ptr, width);
2048     return;
2049   }
2050   for (x = 0; x < width - 1; x += 2) {
2051     dst_ptr[0] = (src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction) >> 8;
2052     dst_ptr[1] = (src_ptr[1] * y0_fraction + src_ptr1[1] * y1_fraction) >> 8;
2053     src_ptr += 2;
2054     src_ptr1 += 2;
2055     dst_ptr += 2;
2056   }
2057   if (width & 1) {
2058     dst_ptr[0] = (src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction) >> 8;
2059   }
2060 }
2061 
InterpolateRow_16_C(uint16 * dst_ptr,const uint16 * src_ptr,ptrdiff_t src_stride,int width,int source_y_fraction)2062 void InterpolateRow_16_C(uint16* dst_ptr, const uint16* src_ptr,
2063                          ptrdiff_t src_stride,
2064                          int width, int source_y_fraction) {
2065   int y1_fraction = source_y_fraction;
2066   int y0_fraction = 256 - y1_fraction;
2067   const uint16* src_ptr1 = src_ptr + src_stride;
2068   int x;
2069   if (source_y_fraction == 0) {
2070     memcpy(dst_ptr, src_ptr, width * 2);
2071     return;
2072   }
2073   if (source_y_fraction == 128) {
2074     HalfRow_16_C(src_ptr, (int)(src_stride), dst_ptr, width);
2075     return;
2076   }
2077   for (x = 0; x < width - 1; x += 2) {
2078     dst_ptr[0] = (src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction) >> 8;
2079     dst_ptr[1] = (src_ptr[1] * y0_fraction + src_ptr1[1] * y1_fraction) >> 8;
2080     src_ptr += 2;
2081     src_ptr1 += 2;
2082     dst_ptr += 2;
2083   }
2084   if (width & 1) {
2085     dst_ptr[0] = (src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction) >> 8;
2086   }
2087 }
2088 
2089 // Use first 4 shuffler values to reorder ARGB channels.
ARGBShuffleRow_C(const uint8 * src_argb,uint8 * dst_argb,const uint8 * shuffler,int pix)2090 void ARGBShuffleRow_C(const uint8* src_argb, uint8* dst_argb,
2091                       const uint8* shuffler, int pix) {
2092   int index0 = shuffler[0];
2093   int index1 = shuffler[1];
2094   int index2 = shuffler[2];
2095   int index3 = shuffler[3];
2096   // Shuffle a row of ARGB.
2097   int x;
2098   for (x = 0; x < pix; ++x) {
2099     // To support in-place conversion.
2100     uint8 b = src_argb[index0];
2101     uint8 g = src_argb[index1];
2102     uint8 r = src_argb[index2];
2103     uint8 a = src_argb[index3];
2104     dst_argb[0] = b;
2105     dst_argb[1] = g;
2106     dst_argb[2] = r;
2107     dst_argb[3] = a;
2108     src_argb += 4;
2109     dst_argb += 4;
2110   }
2111 }
2112 
I422ToYUY2Row_C(const uint8 * src_y,const uint8 * src_u,const uint8 * src_v,uint8 * dst_frame,int width)2113 void I422ToYUY2Row_C(const uint8* src_y,
2114                      const uint8* src_u,
2115                      const uint8* src_v,
2116                      uint8* dst_frame, int width) {
2117   int x;
2118   for (x = 0; x < width - 1; x += 2) {
2119     dst_frame[0] = src_y[0];
2120     dst_frame[1] = src_u[0];
2121     dst_frame[2] = src_y[1];
2122     dst_frame[3] = src_v[0];
2123     dst_frame += 4;
2124     src_y += 2;
2125     src_u += 1;
2126     src_v += 1;
2127   }
2128   if (width & 1) {
2129     dst_frame[0] = src_y[0];
2130     dst_frame[1] = src_u[0];
2131     dst_frame[2] = 0;
2132     dst_frame[3] = src_v[0];
2133   }
2134 }
2135 
I422ToUYVYRow_C(const uint8 * src_y,const uint8 * src_u,const uint8 * src_v,uint8 * dst_frame,int width)2136 void I422ToUYVYRow_C(const uint8* src_y,
2137                      const uint8* src_u,
2138                      const uint8* src_v,
2139                      uint8* dst_frame, int width) {
2140   int x;
2141   for (x = 0; x < width - 1; x += 2) {
2142     dst_frame[0] = src_u[0];
2143     dst_frame[1] = src_y[0];
2144     dst_frame[2] = src_v[0];
2145     dst_frame[3] = src_y[1];
2146     dst_frame += 4;
2147     src_y += 2;
2148     src_u += 1;
2149     src_v += 1;
2150   }
2151   if (width & 1) {
2152     dst_frame[0] = src_u[0];
2153     dst_frame[1] = src_y[0];
2154     dst_frame[2] = src_v[0];
2155     dst_frame[3] = 0;
2156   }
2157 }
2158 
2159 // Maximum temporary width for wrappers to process at a time, in pixels.
2160 #define MAXTWIDTH 2048
2161 
2162 #if !(defined(_MSC_VER) && !defined(__clang__)) && \
2163     defined(HAS_I422TORGB565ROW_SSSE3)
2164 // row_win.cc has asm version, but GCC uses 2 step wrapper.
I422ToRGB565Row_SSSE3(const uint8 * src_y,const uint8 * src_u,const uint8 * src_v,uint8 * dst_rgb565,int width)2165 void I422ToRGB565Row_SSSE3(const uint8* src_y,
2166                            const uint8* src_u,
2167                            const uint8* src_v,
2168                            uint8* dst_rgb565,
2169                            int width) {
2170   SIMD_ALIGNED(uint8 row[MAXTWIDTH * 4]);
2171   while (width > 0) {
2172     int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
2173     I422ToARGBRow_SSSE3(src_y, src_u, src_v, row, twidth);
2174     ARGBToRGB565Row_SSE2(row, dst_rgb565, twidth);
2175     src_y += twidth;
2176     src_u += twidth / 2;
2177     src_v += twidth / 2;
2178     dst_rgb565 += twidth * 2;
2179     width -= twidth;
2180   }
2181 }
2182 #endif
2183 
2184 #if defined(HAS_I422TOARGB1555ROW_SSSE3)
I422ToARGB1555Row_SSSE3(const uint8 * src_y,const uint8 * src_u,const uint8 * src_v,uint8 * dst_argb1555,int width)2185 void I422ToARGB1555Row_SSSE3(const uint8* src_y,
2186                              const uint8* src_u,
2187                              const uint8* src_v,
2188                              uint8* dst_argb1555,
2189                              int width) {
2190   // Row buffer for intermediate ARGB pixels.
2191   SIMD_ALIGNED(uint8 row[MAXTWIDTH * 4]);
2192   while (width > 0) {
2193     int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
2194     I422ToARGBRow_SSSE3(src_y, src_u, src_v, row, twidth);
2195     ARGBToARGB1555Row_SSE2(row, dst_argb1555, twidth);
2196     src_y += twidth;
2197     src_u += twidth / 2;
2198     src_v += twidth / 2;
2199     dst_argb1555 += twidth * 2;
2200     width -= twidth;
2201   }
2202 }
2203 #endif
2204 
2205 #if defined(HAS_I422TOARGB4444ROW_SSSE3)
I422ToARGB4444Row_SSSE3(const uint8 * src_y,const uint8 * src_u,const uint8 * src_v,uint8 * dst_argb4444,int width)2206 void I422ToARGB4444Row_SSSE3(const uint8* src_y,
2207                              const uint8* src_u,
2208                              const uint8* src_v,
2209                              uint8* dst_argb4444,
2210                              int width) {
2211   // Row buffer for intermediate ARGB pixels.
2212   SIMD_ALIGNED(uint8 row[MAXTWIDTH * 4]);
2213   while (width > 0) {
2214     int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
2215     I422ToARGBRow_SSSE3(src_y, src_u, src_v, row, twidth);
2216     ARGBToARGB4444Row_SSE2(row, dst_argb4444, twidth);
2217     src_y += twidth;
2218     src_u += twidth / 2;
2219     src_v += twidth / 2;
2220     dst_argb4444 += twidth * 2;
2221     width -= twidth;
2222   }
2223 }
2224 #endif
2225 
2226 #if defined(HAS_NV12TORGB565ROW_SSSE3)
NV12ToRGB565Row_SSSE3(const uint8 * src_y,const uint8 * src_uv,uint8 * dst_rgb565,int width)2227 void NV12ToRGB565Row_SSSE3(const uint8* src_y, const uint8* src_uv,
2228                            uint8* dst_rgb565, int width) {
2229   // Row buffer for intermediate ARGB pixels.
2230   SIMD_ALIGNED(uint8 row[MAXTWIDTH * 4]);
2231   while (width > 0) {
2232     int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
2233     NV12ToARGBRow_SSSE3(src_y, src_uv, row, twidth);
2234     ARGBToRGB565Row_SSE2(row, dst_rgb565, twidth);
2235     src_y += twidth;
2236     src_uv += twidth;
2237     dst_rgb565 += twidth * 2;
2238     width -= twidth;
2239   }
2240 }
2241 #endif
2242 
2243 #if defined(HAS_NV21TORGB565ROW_SSSE3)
NV21ToRGB565Row_SSSE3(const uint8 * src_y,const uint8 * src_vu,uint8 * dst_rgb565,int width)2244 void NV21ToRGB565Row_SSSE3(const uint8* src_y, const uint8* src_vu,
2245                            uint8* dst_rgb565, int width) {
2246   // Row buffer for intermediate ARGB pixels.
2247   SIMD_ALIGNED(uint8 row[MAXTWIDTH * 4]);
2248   while (width > 0) {
2249     int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
2250     NV21ToARGBRow_SSSE3(src_y, src_vu, row, twidth);
2251     ARGBToRGB565Row_SSE2(row, dst_rgb565, twidth);
2252     src_y += twidth;
2253     src_vu += twidth;
2254     dst_rgb565 += twidth * 2;
2255     width -= twidth;
2256   }
2257 }
2258 #endif
2259 
2260 #if defined(HAS_YUY2TOARGBROW_SSSE3)
YUY2ToARGBRow_SSSE3(const uint8 * src_yuy2,uint8 * dst_argb,int width)2261 void YUY2ToARGBRow_SSSE3(const uint8* src_yuy2, uint8* dst_argb, int width) {
2262   // Row buffers for intermediate YUV pixels.
2263   SIMD_ALIGNED(uint8 row_y[MAXTWIDTH]);
2264   SIMD_ALIGNED(uint8 row_u[MAXTWIDTH / 2]);
2265   SIMD_ALIGNED(uint8 row_v[MAXTWIDTH / 2]);
2266   while (width > 0) {
2267     int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
2268     YUY2ToUV422Row_SSE2(src_yuy2, row_u, row_v, twidth);
2269     YUY2ToYRow_SSE2(src_yuy2, row_y, twidth);
2270     I422ToARGBRow_SSSE3(row_y, row_u, row_v, dst_argb, twidth);
2271     src_yuy2 += twidth * 2;
2272     dst_argb += twidth * 4;
2273     width -= twidth;
2274   }
2275 }
2276 #endif
2277 
2278 #if defined(HAS_UYVYTOARGBROW_SSSE3)
UYVYToARGBRow_SSSE3(const uint8 * src_uyvy,uint8 * dst_argb,int width)2279 void UYVYToARGBRow_SSSE3(const uint8* src_uyvy, uint8* dst_argb, int width) {
2280   // Row buffers for intermediate YUV pixels.
2281   SIMD_ALIGNED(uint8 row_y[MAXTWIDTH]);
2282   SIMD_ALIGNED(uint8 row_u[MAXTWIDTH / 2]);
2283   SIMD_ALIGNED(uint8 row_v[MAXTWIDTH / 2]);
2284   while (width > 0) {
2285     int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
2286     UYVYToUV422Row_SSE2(src_uyvy, row_u, row_v, twidth);
2287     UYVYToYRow_SSE2(src_uyvy, row_y, twidth);
2288     I422ToARGBRow_SSSE3(row_y, row_u, row_v, dst_argb, twidth);
2289     src_uyvy += twidth * 2;
2290     dst_argb += twidth * 4;
2291     width -= twidth;
2292   }
2293 }
2294 #endif  // !defined(LIBYUV_DISABLE_X86)
2295 
2296 #if defined(HAS_I422TORGB565ROW_AVX2)
I422ToRGB565Row_AVX2(const uint8 * src_y,const uint8 * src_u,const uint8 * src_v,uint8 * dst_rgb565,int width)2297 void I422ToRGB565Row_AVX2(const uint8* src_y,
2298                           const uint8* src_u,
2299                           const uint8* src_v,
2300                           uint8* dst_rgb565,
2301                           int width) {
2302   SIMD_ALIGNED32(uint8 row[MAXTWIDTH * 4]);
2303   while (width > 0) {
2304     int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
2305     I422ToARGBRow_AVX2(src_y, src_u, src_v, row, twidth);
2306     ARGBToRGB565Row_AVX2(row, dst_rgb565, twidth);
2307     src_y += twidth;
2308     src_u += twidth / 2;
2309     src_v += twidth / 2;
2310     dst_rgb565 += twidth * 2;
2311     width -= twidth;
2312   }
2313 }
2314 #endif
2315 
2316 #if defined(HAS_I422TOARGB1555ROW_AVX2)
I422ToARGB1555Row_AVX2(const uint8 * src_y,const uint8 * src_u,const uint8 * src_v,uint8 * dst_argb1555,int width)2317 void I422ToARGB1555Row_AVX2(const uint8* src_y,
2318                             const uint8* src_u,
2319                             const uint8* src_v,
2320                             uint8* dst_argb1555,
2321                             int width) {
2322   // Row buffer for intermediate ARGB pixels.
2323   SIMD_ALIGNED32(uint8 row[MAXTWIDTH * 4]);
2324   while (width > 0) {
2325     int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
2326     I422ToARGBRow_AVX2(src_y, src_u, src_v, row, twidth);
2327     ARGBToARGB1555Row_AVX2(row, dst_argb1555, twidth);
2328     src_y += twidth;
2329     src_u += twidth / 2;
2330     src_v += twidth / 2;
2331     dst_argb1555 += twidth * 2;
2332     width -= twidth;
2333   }
2334 }
2335 #endif
2336 
2337 #if defined(HAS_I422TOARGB4444ROW_AVX2)
I422ToARGB4444Row_AVX2(const uint8 * src_y,const uint8 * src_u,const uint8 * src_v,uint8 * dst_argb4444,int width)2338 void I422ToARGB4444Row_AVX2(const uint8* src_y,
2339                             const uint8* src_u,
2340                             const uint8* src_v,
2341                             uint8* dst_argb4444,
2342                             int width) {
2343   // Row buffer for intermediate ARGB pixels.
2344   SIMD_ALIGNED32(uint8 row[MAXTWIDTH * 4]);
2345   while (width > 0) {
2346     int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
2347     I422ToARGBRow_AVX2(src_y, src_u, src_v, row, twidth);
2348     ARGBToARGB4444Row_AVX2(row, dst_argb4444, twidth);
2349     src_y += twidth;
2350     src_u += twidth / 2;
2351     src_v += twidth / 2;
2352     dst_argb4444 += twidth * 2;
2353     width -= twidth;
2354   }
2355 }
2356 #endif
2357 
2358 #if defined(HAS_I422TORGB24ROW_AVX2)
I422ToRGB24Row_AVX2(const uint8 * src_y,const uint8 * src_u,const uint8 * src_v,uint8 * dst_rgb24,int width)2359 void I422ToRGB24Row_AVX2(const uint8* src_y,
2360                             const uint8* src_u,
2361                             const uint8* src_v,
2362                             uint8* dst_rgb24,
2363                             int width) {
2364   // Row buffer for intermediate ARGB pixels.
2365   SIMD_ALIGNED32(uint8 row[MAXTWIDTH * 4]);
2366   while (width > 0) {
2367     int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
2368     I422ToARGBRow_AVX2(src_y, src_u, src_v, row, twidth);
2369     // TODO(fbarchard): ARGBToRGB24Row_AVX2
2370     ARGBToRGB24Row_SSSE3(row, dst_rgb24, twidth);
2371     src_y += twidth;
2372     src_u += twidth / 2;
2373     src_v += twidth / 2;
2374     dst_rgb24 += twidth * 3;
2375     width -= twidth;
2376   }
2377 }
2378 #endif
2379 
2380 #if defined(HAS_I422TORAWROW_AVX2)
I422ToRAWRow_AVX2(const uint8 * src_y,const uint8 * src_u,const uint8 * src_v,uint8 * dst_raw,int width)2381 void I422ToRAWRow_AVX2(const uint8* src_y,
2382                             const uint8* src_u,
2383                             const uint8* src_v,
2384                             uint8* dst_raw,
2385                             int width) {
2386   // Row buffer for intermediate ARGB pixels.
2387   SIMD_ALIGNED32(uint8 row[MAXTWIDTH * 4]);
2388   while (width > 0) {
2389     int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
2390     I422ToARGBRow_AVX2(src_y, src_u, src_v, row, twidth);
2391     // TODO(fbarchard): ARGBToRAWRow_AVX2
2392     ARGBToRAWRow_SSSE3(row, dst_raw, twidth);
2393     src_y += twidth;
2394     src_u += twidth / 2;
2395     src_v += twidth / 2;
2396     dst_raw += twidth * 3;
2397     width -= twidth;
2398   }
2399 }
2400 #endif
2401 
2402 #if defined(HAS_NV12TORGB565ROW_AVX2)
NV12ToRGB565Row_AVX2(const uint8 * src_y,const uint8 * src_uv,uint8 * dst_rgb565,int width)2403 void NV12ToRGB565Row_AVX2(const uint8* src_y, const uint8* src_uv,
2404                           uint8* dst_rgb565, int width) {
2405   // Row buffer for intermediate ARGB pixels.
2406   SIMD_ALIGNED32(uint8 row[MAXTWIDTH * 4]);
2407   while (width > 0) {
2408     int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
2409     NV12ToARGBRow_AVX2(src_y, src_uv, row, twidth);
2410     ARGBToRGB565Row_AVX2(row, dst_rgb565, twidth);
2411     src_y += twidth;
2412     src_uv += twidth;
2413     dst_rgb565 += twidth * 2;
2414     width -= twidth;
2415   }
2416 }
2417 #endif
2418 
2419 #if defined(HAS_NV21TORGB565ROW_AVX2)
NV21ToRGB565Row_AVX2(const uint8 * src_y,const uint8 * src_vu,uint8 * dst_rgb565,int width)2420 void NV21ToRGB565Row_AVX2(const uint8* src_y, const uint8* src_vu,
2421                           uint8* dst_rgb565, int width) {
2422   // Row buffer for intermediate ARGB pixels.
2423   SIMD_ALIGNED32(uint8 row[MAXTWIDTH * 4]);
2424   while (width > 0) {
2425     int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
2426     NV21ToARGBRow_AVX2(src_y, src_vu, row, twidth);
2427     ARGBToRGB565Row_AVX2(row, dst_rgb565, twidth);
2428     src_y += twidth;
2429     src_vu += twidth;
2430     dst_rgb565 += twidth * 2;
2431     width -= twidth;
2432   }
2433 }
2434 #endif
2435 
2436 #if defined(HAS_YUY2TOARGBROW_AVX2)
YUY2ToARGBRow_AVX2(const uint8 * src_yuy2,uint8 * dst_argb,int width)2437 void YUY2ToARGBRow_AVX2(const uint8* src_yuy2, uint8* dst_argb, int width) {
2438   // Row buffers for intermediate YUV pixels.
2439   SIMD_ALIGNED32(uint8 row_y[MAXTWIDTH]);
2440   SIMD_ALIGNED32(uint8 row_u[MAXTWIDTH / 2]);
2441   SIMD_ALIGNED32(uint8 row_v[MAXTWIDTH / 2]);
2442   while (width > 0) {
2443     int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
2444     YUY2ToUV422Row_AVX2(src_yuy2, row_u, row_v, twidth);
2445     YUY2ToYRow_AVX2(src_yuy2, row_y, twidth);
2446     I422ToARGBRow_AVX2(row_y, row_u, row_v, dst_argb, twidth);
2447     src_yuy2 += twidth * 2;
2448     dst_argb += twidth * 4;
2449     width -= twidth;
2450   }
2451 }
2452 #endif
2453 
2454 #if defined(HAS_UYVYTOARGBROW_AVX2)
UYVYToARGBRow_AVX2(const uint8 * src_uyvy,uint8 * dst_argb,int width)2455 void UYVYToARGBRow_AVX2(const uint8* src_uyvy, uint8* dst_argb, int width) {
2456   // Row buffers for intermediate YUV pixels.
2457   SIMD_ALIGNED32(uint8 row_y[MAXTWIDTH]);
2458   SIMD_ALIGNED32(uint8 row_u[MAXTWIDTH / 2]);
2459   SIMD_ALIGNED32(uint8 row_v[MAXTWIDTH / 2]);
2460   while (width > 0) {
2461     int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
2462     UYVYToUV422Row_AVX2(src_uyvy, row_u, row_v, twidth);
2463     UYVYToYRow_AVX2(src_uyvy, row_y, twidth);
2464     I422ToARGBRow_AVX2(row_y, row_u, row_v, dst_argb, twidth);
2465     src_uyvy += twidth * 2;
2466     dst_argb += twidth * 4;
2467     width -= twidth;
2468   }
2469 }
2470 #endif  // !defined(LIBYUV_DISABLE_X86)
2471 
ARGBPolynomialRow_C(const uint8 * src_argb,uint8 * dst_argb,const float * poly,int width)2472 void ARGBPolynomialRow_C(const uint8* src_argb,
2473                          uint8* dst_argb, const float* poly,
2474                          int width) {
2475   int i;
2476   for (i = 0; i < width; ++i) {
2477     float b = (float)(src_argb[0]);
2478     float g = (float)(src_argb[1]);
2479     float r = (float)(src_argb[2]);
2480     float a = (float)(src_argb[3]);
2481     float b2 = b * b;
2482     float g2 = g * g;
2483     float r2 = r * r;
2484     float a2 = a * a;
2485     float db = poly[0] + poly[4] * b;
2486     float dg = poly[1] + poly[5] * g;
2487     float dr = poly[2] + poly[6] * r;
2488     float da = poly[3] + poly[7] * a;
2489     float b3 = b2 * b;
2490     float g3 = g2 * g;
2491     float r3 = r2 * r;
2492     float a3 = a2 * a;
2493     db += poly[8] * b2;
2494     dg += poly[9] * g2;
2495     dr += poly[10] * r2;
2496     da += poly[11] * a2;
2497     db += poly[12] * b3;
2498     dg += poly[13] * g3;
2499     dr += poly[14] * r3;
2500     da += poly[15] * a3;
2501 
2502     dst_argb[0] = Clamp((int32)(db));
2503     dst_argb[1] = Clamp((int32)(dg));
2504     dst_argb[2] = Clamp((int32)(dr));
2505     dst_argb[3] = Clamp((int32)(da));
2506     src_argb += 4;
2507     dst_argb += 4;
2508   }
2509 }
2510 
ARGBLumaColorTableRow_C(const uint8 * src_argb,uint8 * dst_argb,int width,const uint8 * luma,uint32 lumacoeff)2511 void ARGBLumaColorTableRow_C(const uint8* src_argb, uint8* dst_argb, int width,
2512                              const uint8* luma, uint32 lumacoeff) {
2513   uint32 bc = lumacoeff & 0xff;
2514   uint32 gc = (lumacoeff >> 8) & 0xff;
2515   uint32 rc = (lumacoeff >> 16) & 0xff;
2516 
2517   int i;
2518   for (i = 0; i < width - 1; i += 2) {
2519     // Luminance in rows, color values in columns.
2520     const uint8* luma0 = ((src_argb[0] * bc + src_argb[1] * gc +
2521                            src_argb[2] * rc) & 0x7F00u) + luma;
2522     const uint8* luma1;
2523     dst_argb[0] = luma0[src_argb[0]];
2524     dst_argb[1] = luma0[src_argb[1]];
2525     dst_argb[2] = luma0[src_argb[2]];
2526     dst_argb[3] = src_argb[3];
2527     luma1 = ((src_argb[4] * bc + src_argb[5] * gc +
2528               src_argb[6] * rc) & 0x7F00u) + luma;
2529     dst_argb[4] = luma1[src_argb[4]];
2530     dst_argb[5] = luma1[src_argb[5]];
2531     dst_argb[6] = luma1[src_argb[6]];
2532     dst_argb[7] = src_argb[7];
2533     src_argb += 8;
2534     dst_argb += 8;
2535   }
2536   if (width & 1) {
2537     // Luminance in rows, color values in columns.
2538     const uint8* luma0 = ((src_argb[0] * bc + src_argb[1] * gc +
2539                            src_argb[2] * rc) & 0x7F00u) + luma;
2540     dst_argb[0] = luma0[src_argb[0]];
2541     dst_argb[1] = luma0[src_argb[1]];
2542     dst_argb[2] = luma0[src_argb[2]];
2543     dst_argb[3] = src_argb[3];
2544   }
2545 }
2546 
ARGBCopyAlphaRow_C(const uint8 * src,uint8 * dst,int width)2547 void ARGBCopyAlphaRow_C(const uint8* src, uint8* dst, int width) {
2548   int i;
2549   for (i = 0; i < width - 1; i += 2) {
2550     dst[3] = src[3];
2551     dst[7] = src[7];
2552     dst += 8;
2553     src += 8;
2554   }
2555   if (width & 1) {
2556     dst[3] = src[3];
2557   }
2558 }
2559 
ARGBCopyYToAlphaRow_C(const uint8 * src,uint8 * dst,int width)2560 void ARGBCopyYToAlphaRow_C(const uint8* src, uint8* dst, int width) {
2561   int i;
2562   for (i = 0; i < width - 1; i += 2) {
2563     dst[3] = src[0];
2564     dst[7] = src[1];
2565     dst += 8;
2566     src += 2;
2567   }
2568   if (width & 1) {
2569     dst[3] = src[0];
2570   }
2571 }
2572 
2573 #ifdef __cplusplus
2574 }  // extern "C"
2575 }  // namespace libyuv
2576 #endif
2577