1 /*
2  *  Copyright 2011 The LibYuv Project Authors. All rights reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS. All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include "libyuv/row.h"
12 
13 #include <string.h>  // For memcpy and memset.
14 
15 #include "libyuv/basic_types.h"
16 
17 #ifdef __cplusplus
18 namespace libyuv {
19 extern "C" {
20 #endif
21 
22 // llvm x86 is poor at ternary operator, so use branchless min/max.
23 
24 #define USE_BRANCHLESS 1
25 #if USE_BRANCHLESS
clamp0(int32 v)26 static __inline int32 clamp0(int32 v) {
27   return ((-(v) >> 31) & (v));
28 }
29 
clamp255(int32 v)30 static __inline int32 clamp255(int32 v) {
31   return (((255 - (v)) >> 31) | (v)) & 255;
32 }
33 
Clamp(int32 val)34 static __inline uint32 Clamp(int32 val) {
35   int v = clamp0(val);
36   return (uint32)(clamp255(v));
37 }
38 
Abs(int32 v)39 static __inline uint32 Abs(int32 v) {
40   int m = v >> 31;
41   return (v + m) ^ m;
42 }
43 #else  // USE_BRANCHLESS
44 static __inline int32 clamp0(int32 v) {
45   return (v < 0) ? 0 : v;
46 }
47 
48 static __inline int32 clamp255(int32 v) {
49   return (v > 255) ? 255 : v;
50 }
51 
52 static __inline uint32 Clamp(int32 val) {
53   int v = clamp0(val);
54   return (uint32)(clamp255(v));
55 }
56 
57 static __inline uint32 Abs(int32 v) {
58   return (v < 0) ? -v : v;
59 }
60 #endif  // USE_BRANCHLESS
61 
62 #ifdef LIBYUV_LITTLE_ENDIAN
63 #define WRITEWORD(p, v) *(uint32*)(p) = v
64 #else
WRITEWORD(uint8 * p,uint32 v)65 static inline void WRITEWORD(uint8* p, uint32 v) {
66   p[0] = (uint8)(v & 255);
67   p[1] = (uint8)((v >> 8) & 255);
68   p[2] = (uint8)((v >> 16) & 255);
69   p[3] = (uint8)((v >> 24) & 255);
70 }
71 #endif
72 
RGB24ToARGBRow_C(const uint8 * src_rgb24,uint8 * dst_argb,int width)73 void RGB24ToARGBRow_C(const uint8* src_rgb24, uint8* dst_argb, int width) {
74   int x;
75   for (x = 0; x < width; ++x) {
76     uint8 b = src_rgb24[0];
77     uint8 g = src_rgb24[1];
78     uint8 r = src_rgb24[2];
79     dst_argb[0] = b;
80     dst_argb[1] = g;
81     dst_argb[2] = r;
82     dst_argb[3] = 255u;
83     dst_argb += 4;
84     src_rgb24 += 3;
85   }
86 }
87 
RAWToARGBRow_C(const uint8 * src_raw,uint8 * dst_argb,int width)88 void RAWToARGBRow_C(const uint8* src_raw, uint8* dst_argb, int width) {
89   int x;
90   for (x = 0; x < width; ++x) {
91     uint8 r = src_raw[0];
92     uint8 g = src_raw[1];
93     uint8 b = src_raw[2];
94     dst_argb[0] = b;
95     dst_argb[1] = g;
96     dst_argb[2] = r;
97     dst_argb[3] = 255u;
98     dst_argb += 4;
99     src_raw += 3;
100   }
101 }
102 
RGB565ToARGBRow_C(const uint8 * src_rgb565,uint8 * dst_argb,int width)103 void RGB565ToARGBRow_C(const uint8* src_rgb565, uint8* dst_argb, int width) {
104   int x;
105   for (x = 0; x < width; ++x) {
106     uint8 b = src_rgb565[0] & 0x1f;
107     uint8 g = (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3);
108     uint8 r = src_rgb565[1] >> 3;
109     dst_argb[0] = (b << 3) | (b >> 2);
110     dst_argb[1] = (g << 2) | (g >> 4);
111     dst_argb[2] = (r << 3) | (r >> 2);
112     dst_argb[3] = 255u;
113     dst_argb += 4;
114     src_rgb565 += 2;
115   }
116 }
117 
ARGB1555ToARGBRow_C(const uint8 * src_argb1555,uint8 * dst_argb,int width)118 void ARGB1555ToARGBRow_C(const uint8* src_argb1555, uint8* dst_argb,
119                          int width) {
120   int x;
121   for (x = 0; x < width; ++x) {
122     uint8 b = src_argb1555[0] & 0x1f;
123     uint8 g = (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3);
124     uint8 r = (src_argb1555[1] & 0x7c) >> 2;
125     uint8 a = src_argb1555[1] >> 7;
126     dst_argb[0] = (b << 3) | (b >> 2);
127     dst_argb[1] = (g << 3) | (g >> 2);
128     dst_argb[2] = (r << 3) | (r >> 2);
129     dst_argb[3] = -a;
130     dst_argb += 4;
131     src_argb1555 += 2;
132   }
133 }
134 
ARGB4444ToARGBRow_C(const uint8 * src_argb4444,uint8 * dst_argb,int width)135 void ARGB4444ToARGBRow_C(const uint8* src_argb4444, uint8* dst_argb,
136                          int width) {
137   int x;
138   for (x = 0; x < width; ++x) {
139     uint8 b = src_argb4444[0] & 0x0f;
140     uint8 g = src_argb4444[0] >> 4;
141     uint8 r = src_argb4444[1] & 0x0f;
142     uint8 a = src_argb4444[1] >> 4;
143     dst_argb[0] = (b << 4) | b;
144     dst_argb[1] = (g << 4) | g;
145     dst_argb[2] = (r << 4) | r;
146     dst_argb[3] = (a << 4) | a;
147     dst_argb += 4;
148     src_argb4444 += 2;
149   }
150 }
151 
ARGBToRGB24Row_C(const uint8 * src_argb,uint8 * dst_rgb,int width)152 void ARGBToRGB24Row_C(const uint8* src_argb, uint8* dst_rgb, int width) {
153   int x;
154   for (x = 0; x < width; ++x) {
155     uint8 b = src_argb[0];
156     uint8 g = src_argb[1];
157     uint8 r = src_argb[2];
158     dst_rgb[0] = b;
159     dst_rgb[1] = g;
160     dst_rgb[2] = r;
161     dst_rgb += 3;
162     src_argb += 4;
163   }
164 }
165 
ARGBToRAWRow_C(const uint8 * src_argb,uint8 * dst_rgb,int width)166 void ARGBToRAWRow_C(const uint8* src_argb, uint8* dst_rgb, int width) {
167   int x;
168   for (x = 0; x < width; ++x) {
169     uint8 b = src_argb[0];
170     uint8 g = src_argb[1];
171     uint8 r = src_argb[2];
172     dst_rgb[0] = r;
173     dst_rgb[1] = g;
174     dst_rgb[2] = b;
175     dst_rgb += 3;
176     src_argb += 4;
177   }
178 }
179 
ARGBToRGB565Row_C(const uint8 * src_argb,uint8 * dst_rgb,int width)180 void ARGBToRGB565Row_C(const uint8* src_argb, uint8* dst_rgb, int width) {
181   int x;
182   for (x = 0; x < width - 1; x += 2) {
183     uint8 b0 = src_argb[0] >> 3;
184     uint8 g0 = src_argb[1] >> 2;
185     uint8 r0 = src_argb[2] >> 3;
186     uint8 b1 = src_argb[4] >> 3;
187     uint8 g1 = src_argb[5] >> 2;
188     uint8 r1 = src_argb[6] >> 3;
189     WRITEWORD(dst_rgb, b0 | (g0 << 5) | (r0 << 11) |
190               (b1 << 16) | (g1 << 21) | (r1 << 27));
191     dst_rgb += 4;
192     src_argb += 8;
193   }
194   if (width & 1) {
195     uint8 b0 = src_argb[0] >> 3;
196     uint8 g0 = src_argb[1] >> 2;
197     uint8 r0 = src_argb[2] >> 3;
198     *(uint16*)(dst_rgb) = b0 | (g0 << 5) | (r0 << 11);
199   }
200 }
201 
ARGBToARGB1555Row_C(const uint8 * src_argb,uint8 * dst_rgb,int width)202 void ARGBToARGB1555Row_C(const uint8* src_argb, uint8* dst_rgb, int width) {
203   int x;
204   for (x = 0; x < width - 1; x += 2) {
205     uint8 b0 = src_argb[0] >> 3;
206     uint8 g0 = src_argb[1] >> 3;
207     uint8 r0 = src_argb[2] >> 3;
208     uint8 a0 = src_argb[3] >> 7;
209     uint8 b1 = src_argb[4] >> 3;
210     uint8 g1 = src_argb[5] >> 3;
211     uint8 r1 = src_argb[6] >> 3;
212     uint8 a1 = src_argb[7] >> 7;
213     *(uint32*)(dst_rgb) =
214         b0 | (g0 << 5) | (r0 << 10) | (a0 << 15) |
215         (b1 << 16) | (g1 << 21) | (r1 << 26) | (a1 << 31);
216     dst_rgb += 4;
217     src_argb += 8;
218   }
219   if (width & 1) {
220     uint8 b0 = src_argb[0] >> 3;
221     uint8 g0 = src_argb[1] >> 3;
222     uint8 r0 = src_argb[2] >> 3;
223     uint8 a0 = src_argb[3] >> 7;
224     *(uint16*)(dst_rgb) =
225         b0 | (g0 << 5) | (r0 << 10) | (a0 << 15);
226   }
227 }
228 
ARGBToARGB4444Row_C(const uint8 * src_argb,uint8 * dst_rgb,int width)229 void ARGBToARGB4444Row_C(const uint8* src_argb, uint8* dst_rgb, int width) {
230   int x;
231   for (x = 0; x < width - 1; x += 2) {
232     uint8 b0 = src_argb[0] >> 4;
233     uint8 g0 = src_argb[1] >> 4;
234     uint8 r0 = src_argb[2] >> 4;
235     uint8 a0 = src_argb[3] >> 4;
236     uint8 b1 = src_argb[4] >> 4;
237     uint8 g1 = src_argb[5] >> 4;
238     uint8 r1 = src_argb[6] >> 4;
239     uint8 a1 = src_argb[7] >> 4;
240     *(uint32*)(dst_rgb) =
241         b0 | (g0 << 4) | (r0 << 8) | (a0 << 12) |
242         (b1 << 16) | (g1 << 20) | (r1 << 24) | (a1 << 28);
243     dst_rgb += 4;
244     src_argb += 8;
245   }
246   if (width & 1) {
247     uint8 b0 = src_argb[0] >> 4;
248     uint8 g0 = src_argb[1] >> 4;
249     uint8 r0 = src_argb[2] >> 4;
250     uint8 a0 = src_argb[3] >> 4;
251     *(uint16*)(dst_rgb) =
252         b0 | (g0 << 4) | (r0 << 8) | (a0 << 12);
253   }
254 }
255 
RGBToY(uint8 r,uint8 g,uint8 b)256 static __inline int RGBToY(uint8 r, uint8 g, uint8 b) {
257   return (66 * r + 129 * g +  25 * b + 0x1080) >> 8;
258 }
259 
RGBToU(uint8 r,uint8 g,uint8 b)260 static __inline int RGBToU(uint8 r, uint8 g, uint8 b) {
261   return (112 * b - 74 * g - 38 * r + 0x8080) >> 8;
262 }
RGBToV(uint8 r,uint8 g,uint8 b)263 static __inline int RGBToV(uint8 r, uint8 g, uint8 b) {
264   return (112 * r - 94 * g - 18 * b + 0x8080) >> 8;
265 }
266 
267 #define MAKEROWY(NAME, R, G, B, BPP) \
268 void NAME ## ToYRow_C(const uint8* src_argb0, uint8* dst_y, int width) {       \
269   int x;                                                                       \
270   for (x = 0; x < width; ++x) {                                                \
271     dst_y[0] = RGBToY(src_argb0[R], src_argb0[G], src_argb0[B]);               \
272     src_argb0 += BPP;                                                          \
273     dst_y += 1;                                                                \
274   }                                                                            \
275 }                                                                              \
276 void NAME ## ToUVRow_C(const uint8* src_rgb0, int src_stride_rgb,              \
277                        uint8* dst_u, uint8* dst_v, int width) {                \
278   const uint8* src_rgb1 = src_rgb0 + src_stride_rgb;                           \
279   int x;                                                                       \
280   for (x = 0; x < width - 1; x += 2) {                                         \
281     uint8 ab = (src_rgb0[B] + src_rgb0[B + BPP] +                              \
282                src_rgb1[B] + src_rgb1[B + BPP]) >> 2;                          \
283     uint8 ag = (src_rgb0[G] + src_rgb0[G + BPP] +                              \
284                src_rgb1[G] + src_rgb1[G + BPP]) >> 2;                          \
285     uint8 ar = (src_rgb0[R] + src_rgb0[R + BPP] +                              \
286                src_rgb1[R] + src_rgb1[R + BPP]) >> 2;                          \
287     dst_u[0] = RGBToU(ar, ag, ab);                                             \
288     dst_v[0] = RGBToV(ar, ag, ab);                                             \
289     src_rgb0 += BPP * 2;                                                       \
290     src_rgb1 += BPP * 2;                                                       \
291     dst_u += 1;                                                                \
292     dst_v += 1;                                                                \
293   }                                                                            \
294   if (width & 1) {                                                             \
295     uint8 ab = (src_rgb0[B] + src_rgb1[B]) >> 1;                               \
296     uint8 ag = (src_rgb0[G] + src_rgb1[G]) >> 1;                               \
297     uint8 ar = (src_rgb0[R] + src_rgb1[R]) >> 1;                               \
298     dst_u[0] = RGBToU(ar, ag, ab);                                             \
299     dst_v[0] = RGBToV(ar, ag, ab);                                             \
300   }                                                                            \
301 }
302 
303 MAKEROWY(ARGB, 2, 1, 0, 4)
304 MAKEROWY(BGRA, 1, 2, 3, 4)
305 MAKEROWY(ABGR, 0, 1, 2, 4)
306 MAKEROWY(RGBA, 3, 2, 1, 4)
307 MAKEROWY(RGB24, 2, 1, 0, 3)
308 MAKEROWY(RAW, 0, 1, 2, 3)
309 #undef MAKEROWY
310 
311 // JPeg uses a variation on BT.601-1 full range
312 // y =  0.29900 * r + 0.58700 * g + 0.11400 * b
313 // u = -0.16874 * r - 0.33126 * g + 0.50000 * b  + center
314 // v =  0.50000 * r - 0.41869 * g - 0.08131 * b  + center
315 // BT.601 Mpeg range uses:
316 // b 0.1016 * 255 = 25.908 = 25
317 // g 0.5078 * 255 = 129.489 = 129
318 // r 0.2578 * 255 = 65.739 = 66
319 // JPeg 8 bit Y (not used):
320 // b 0.11400 * 256 = 29.184 = 29
321 // g 0.58700 * 256 = 150.272 = 150
322 // r 0.29900 * 256 = 76.544 = 77
323 // JPeg 7 bit Y:
324 // b 0.11400 * 128 = 14.592 = 15
325 // g 0.58700 * 128 = 75.136 = 75
326 // r 0.29900 * 128 = 38.272 = 38
327 // JPeg 8 bit U:
328 // b  0.50000 * 255 = 127.5 = 127
329 // g -0.33126 * 255 = -84.4713 = -84
330 // r -0.16874 * 255 = -43.0287 = -43
331 // JPeg 8 bit V:
332 // b -0.08131 * 255 = -20.73405 = -20
333 // g -0.41869 * 255 = -106.76595 = -107
334 // r  0.50000 * 255 = 127.5 = 127
335 
RGBToYJ(uint8 r,uint8 g,uint8 b)336 static __inline int RGBToYJ(uint8 r, uint8 g, uint8 b) {
337   return (38 * r + 75 * g +  15 * b + 64) >> 7;
338 }
339 
RGBToUJ(uint8 r,uint8 g,uint8 b)340 static __inline int RGBToUJ(uint8 r, uint8 g, uint8 b) {
341   return (127 * b - 84 * g - 43 * r + 0x8080) >> 8;
342 }
RGBToVJ(uint8 r,uint8 g,uint8 b)343 static __inline int RGBToVJ(uint8 r, uint8 g, uint8 b) {
344   return (127 * r - 107 * g - 20 * b + 0x8080) >> 8;
345 }
346 
347 #define AVGB(a, b) (((a) + (b) + 1) >> 1)
348 
349 #define MAKEROWYJ(NAME, R, G, B, BPP) \
350 void NAME ## ToYJRow_C(const uint8* src_argb0, uint8* dst_y, int width) {      \
351   int x;                                                                       \
352   for (x = 0; x < width; ++x) {                                                \
353     dst_y[0] = RGBToYJ(src_argb0[R], src_argb0[G], src_argb0[B]);              \
354     src_argb0 += BPP;                                                          \
355     dst_y += 1;                                                                \
356   }                                                                            \
357 }                                                                              \
358 void NAME ## ToUVJRow_C(const uint8* src_rgb0, int src_stride_rgb,             \
359                         uint8* dst_u, uint8* dst_v, int width) {               \
360   const uint8* src_rgb1 = src_rgb0 + src_stride_rgb;                           \
361   int x;                                                                       \
362   for (x = 0; x < width - 1; x += 2) {                                         \
363     uint8 ab = AVGB(AVGB(src_rgb0[B], src_rgb1[B]),                            \
364                     AVGB(src_rgb0[B + BPP], src_rgb1[B + BPP]));               \
365     uint8 ag = AVGB(AVGB(src_rgb0[G], src_rgb1[G]),                            \
366                     AVGB(src_rgb0[G + BPP], src_rgb1[G + BPP]));               \
367     uint8 ar = AVGB(AVGB(src_rgb0[R], src_rgb1[R]),                            \
368                     AVGB(src_rgb0[R + BPP], src_rgb1[R + BPP]));               \
369     dst_u[0] = RGBToUJ(ar, ag, ab);                                            \
370     dst_v[0] = RGBToVJ(ar, ag, ab);                                            \
371     src_rgb0 += BPP * 2;                                                       \
372     src_rgb1 += BPP * 2;                                                       \
373     dst_u += 1;                                                                \
374     dst_v += 1;                                                                \
375   }                                                                            \
376   if (width & 1) {                                                             \
377     uint8 ab = AVGB(src_rgb0[B], src_rgb1[B]);                                 \
378     uint8 ag = AVGB(src_rgb0[G], src_rgb1[G]);                                 \
379     uint8 ar = AVGB(src_rgb0[R], src_rgb1[R]);                                 \
380     dst_u[0] = RGBToUJ(ar, ag, ab);                                            \
381     dst_v[0] = RGBToVJ(ar, ag, ab);                                            \
382   }                                                                            \
383 }
384 
385 MAKEROWYJ(ARGB, 2, 1, 0, 4)
386 #undef MAKEROWYJ
387 
RGB565ToYRow_C(const uint8 * src_rgb565,uint8 * dst_y,int width)388 void RGB565ToYRow_C(const uint8* src_rgb565, uint8* dst_y, int width) {
389   int x;
390   for (x = 0; x < width; ++x) {
391     uint8 b = src_rgb565[0] & 0x1f;
392     uint8 g = (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3);
393     uint8 r = src_rgb565[1] >> 3;
394     b = (b << 3) | (b >> 2);
395     g = (g << 2) | (g >> 4);
396     r = (r << 3) | (r >> 2);
397     dst_y[0] = RGBToY(r, g, b);
398     src_rgb565 += 2;
399     dst_y += 1;
400   }
401 }
402 
ARGB1555ToYRow_C(const uint8 * src_argb1555,uint8 * dst_y,int width)403 void ARGB1555ToYRow_C(const uint8* src_argb1555, uint8* dst_y, int width) {
404   int x;
405   for (x = 0; x < width; ++x) {
406     uint8 b = src_argb1555[0] & 0x1f;
407     uint8 g = (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3);
408     uint8 r = (src_argb1555[1] & 0x7c) >> 2;
409     b = (b << 3) | (b >> 2);
410     g = (g << 3) | (g >> 2);
411     r = (r << 3) | (r >> 2);
412     dst_y[0] = RGBToY(r, g, b);
413     src_argb1555 += 2;
414     dst_y += 1;
415   }
416 }
417 
ARGB4444ToYRow_C(const uint8 * src_argb4444,uint8 * dst_y,int width)418 void ARGB4444ToYRow_C(const uint8* src_argb4444, uint8* dst_y, int width) {
419   int x;
420   for (x = 0; x < width; ++x) {
421     uint8 b = src_argb4444[0] & 0x0f;
422     uint8 g = src_argb4444[0] >> 4;
423     uint8 r = src_argb4444[1] & 0x0f;
424     b = (b << 4) | b;
425     g = (g << 4) | g;
426     r = (r << 4) | r;
427     dst_y[0] = RGBToY(r, g, b);
428     src_argb4444 += 2;
429     dst_y += 1;
430   }
431 }
432 
RGB565ToUVRow_C(const uint8 * src_rgb565,int src_stride_rgb565,uint8 * dst_u,uint8 * dst_v,int width)433 void RGB565ToUVRow_C(const uint8* src_rgb565, int src_stride_rgb565,
434                      uint8* dst_u, uint8* dst_v, int width) {
435   const uint8* next_rgb565 = src_rgb565 + src_stride_rgb565;
436   int x;
437   for (x = 0; x < width - 1; x += 2) {
438     uint8 b0 = src_rgb565[0] & 0x1f;
439     uint8 g0 = (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3);
440     uint8 r0 = src_rgb565[1] >> 3;
441     uint8 b1 = src_rgb565[2] & 0x1f;
442     uint8 g1 = (src_rgb565[2] >> 5) | ((src_rgb565[3] & 0x07) << 3);
443     uint8 r1 = src_rgb565[3] >> 3;
444     uint8 b2 = next_rgb565[0] & 0x1f;
445     uint8 g2 = (next_rgb565[0] >> 5) | ((next_rgb565[1] & 0x07) << 3);
446     uint8 r2 = next_rgb565[1] >> 3;
447     uint8 b3 = next_rgb565[2] & 0x1f;
448     uint8 g3 = (next_rgb565[2] >> 5) | ((next_rgb565[3] & 0x07) << 3);
449     uint8 r3 = next_rgb565[3] >> 3;
450     uint8 b = (b0 + b1 + b2 + b3);  // 565 * 4 = 787.
451     uint8 g = (g0 + g1 + g2 + g3);
452     uint8 r = (r0 + r1 + r2 + r3);
453     b = (b << 1) | (b >> 6);  // 787 -> 888.
454     r = (r << 1) | (r >> 6);
455     dst_u[0] = RGBToU(r, g, b);
456     dst_v[0] = RGBToV(r, g, b);
457     src_rgb565 += 4;
458     next_rgb565 += 4;
459     dst_u += 1;
460     dst_v += 1;
461   }
462   if (width & 1) {
463     uint8 b0 = src_rgb565[0] & 0x1f;
464     uint8 g0 = (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3);
465     uint8 r0 = src_rgb565[1] >> 3;
466     uint8 b2 = next_rgb565[0] & 0x1f;
467     uint8 g2 = (next_rgb565[0] >> 5) | ((next_rgb565[1] & 0x07) << 3);
468     uint8 r2 = next_rgb565[1] >> 3;
469     uint8 b = (b0 + b2);  // 565 * 2 = 676.
470     uint8 g = (g0 + g2);
471     uint8 r = (r0 + r2);
472     b = (b << 2) | (b >> 4);  // 676 -> 888
473     g = (g << 1) | (g >> 6);
474     r = (r << 2) | (r >> 4);
475     dst_u[0] = RGBToU(r, g, b);
476     dst_v[0] = RGBToV(r, g, b);
477   }
478 }
479 
ARGB1555ToUVRow_C(const uint8 * src_argb1555,int src_stride_argb1555,uint8 * dst_u,uint8 * dst_v,int width)480 void ARGB1555ToUVRow_C(const uint8* src_argb1555, int src_stride_argb1555,
481                        uint8* dst_u, uint8* dst_v, int width) {
482   const uint8* next_argb1555 = src_argb1555 + src_stride_argb1555;
483   int x;
484   for (x = 0; x < width - 1; x += 2) {
485     uint8 b0 = src_argb1555[0] & 0x1f;
486     uint8 g0 = (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3);
487     uint8 r0 = (src_argb1555[1] & 0x7c) >> 2;
488     uint8 b1 = src_argb1555[2] & 0x1f;
489     uint8 g1 = (src_argb1555[2] >> 5) | ((src_argb1555[3] & 0x03) << 3);
490     uint8 r1 = (src_argb1555[3] & 0x7c) >> 2;
491     uint8 b2 = next_argb1555[0] & 0x1f;
492     uint8 g2 = (next_argb1555[0] >> 5) | ((next_argb1555[1] & 0x03) << 3);
493     uint8 r2 = (next_argb1555[1] & 0x7c) >> 2;
494     uint8 b3 = next_argb1555[2] & 0x1f;
495     uint8 g3 = (next_argb1555[2] >> 5) | ((next_argb1555[3] & 0x03) << 3);
496     uint8 r3 = (next_argb1555[3] & 0x7c) >> 2;
497     uint8 b = (b0 + b1 + b2 + b3);  // 555 * 4 = 777.
498     uint8 g = (g0 + g1 + g2 + g3);
499     uint8 r = (r0 + r1 + r2 + r3);
500     b = (b << 1) | (b >> 6);  // 777 -> 888.
501     g = (g << 1) | (g >> 6);
502     r = (r << 1) | (r >> 6);
503     dst_u[0] = RGBToU(r, g, b);
504     dst_v[0] = RGBToV(r, g, b);
505     src_argb1555 += 4;
506     next_argb1555 += 4;
507     dst_u += 1;
508     dst_v += 1;
509   }
510   if (width & 1) {
511     uint8 b0 = src_argb1555[0] & 0x1f;
512     uint8 g0 = (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3);
513     uint8 r0 = (src_argb1555[1] & 0x7c) >> 2;
514     uint8 b2 = next_argb1555[0] & 0x1f;
515     uint8 g2 = (next_argb1555[0] >> 5) | ((next_argb1555[1] & 0x03) << 3);
516     uint8 r2 = next_argb1555[1] >> 3;
517     uint8 b = (b0 + b2);  // 555 * 2 = 666.
518     uint8 g = (g0 + g2);
519     uint8 r = (r0 + r2);
520     b = (b << 2) | (b >> 4);  // 666 -> 888.
521     g = (g << 2) | (g >> 4);
522     r = (r << 2) | (r >> 4);
523     dst_u[0] = RGBToU(r, g, b);
524     dst_v[0] = RGBToV(r, g, b);
525   }
526 }
527 
ARGB4444ToUVRow_C(const uint8 * src_argb4444,int src_stride_argb4444,uint8 * dst_u,uint8 * dst_v,int width)528 void ARGB4444ToUVRow_C(const uint8* src_argb4444, int src_stride_argb4444,
529                        uint8* dst_u, uint8* dst_v, int width) {
530   const uint8* next_argb4444 = src_argb4444 + src_stride_argb4444;
531   int x;
532   for (x = 0; x < width - 1; x += 2) {
533     uint8 b0 = src_argb4444[0] & 0x0f;
534     uint8 g0 = src_argb4444[0] >> 4;
535     uint8 r0 = src_argb4444[1] & 0x0f;
536     uint8 b1 = src_argb4444[2] & 0x0f;
537     uint8 g1 = src_argb4444[2] >> 4;
538     uint8 r1 = src_argb4444[3] & 0x0f;
539     uint8 b2 = next_argb4444[0] & 0x0f;
540     uint8 g2 = next_argb4444[0] >> 4;
541     uint8 r2 = next_argb4444[1] & 0x0f;
542     uint8 b3 = next_argb4444[2] & 0x0f;
543     uint8 g3 = next_argb4444[2] >> 4;
544     uint8 r3 = next_argb4444[3] & 0x0f;
545     uint8 b = (b0 + b1 + b2 + b3);  // 444 * 4 = 666.
546     uint8 g = (g0 + g1 + g2 + g3);
547     uint8 r = (r0 + r1 + r2 + r3);
548     b = (b << 2) | (b >> 4);  // 666 -> 888.
549     g = (g << 2) | (g >> 4);
550     r = (r << 2) | (r >> 4);
551     dst_u[0] = RGBToU(r, g, b);
552     dst_v[0] = RGBToV(r, g, b);
553     src_argb4444 += 4;
554     next_argb4444 += 4;
555     dst_u += 1;
556     dst_v += 1;
557   }
558   if (width & 1) {
559     uint8 b0 = src_argb4444[0] & 0x0f;
560     uint8 g0 = src_argb4444[0] >> 4;
561     uint8 r0 = src_argb4444[1] & 0x0f;
562     uint8 b2 = next_argb4444[0] & 0x0f;
563     uint8 g2 = next_argb4444[0] >> 4;
564     uint8 r2 = next_argb4444[1] & 0x0f;
565     uint8 b = (b0 + b2);  // 444 * 2 = 555.
566     uint8 g = (g0 + g2);
567     uint8 r = (r0 + r2);
568     b = (b << 3) | (b >> 2);  // 555 -> 888.
569     g = (g << 3) | (g >> 2);
570     r = (r << 3) | (r >> 2);
571     dst_u[0] = RGBToU(r, g, b);
572     dst_v[0] = RGBToV(r, g, b);
573   }
574 }
575 
ARGBToUV444Row_C(const uint8 * src_argb,uint8 * dst_u,uint8 * dst_v,int width)576 void ARGBToUV444Row_C(const uint8* src_argb,
577                       uint8* dst_u, uint8* dst_v, int width) {
578   int x;
579   for (x = 0; x < width; ++x) {
580     uint8 ab = src_argb[0];
581     uint8 ag = src_argb[1];
582     uint8 ar = src_argb[2];
583     dst_u[0] = RGBToU(ar, ag, ab);
584     dst_v[0] = RGBToV(ar, ag, ab);
585     src_argb += 4;
586     dst_u += 1;
587     dst_v += 1;
588   }
589 }
590 
ARGBToUV422Row_C(const uint8 * src_argb,uint8 * dst_u,uint8 * dst_v,int width)591 void ARGBToUV422Row_C(const uint8* src_argb,
592                       uint8* dst_u, uint8* dst_v, int width) {
593   int x;
594   for (x = 0; x < width - 1; x += 2) {
595     uint8 ab = (src_argb[0] + src_argb[4]) >> 1;
596     uint8 ag = (src_argb[1] + src_argb[5]) >> 1;
597     uint8 ar = (src_argb[2] + src_argb[6]) >> 1;
598     dst_u[0] = RGBToU(ar, ag, ab);
599     dst_v[0] = RGBToV(ar, ag, ab);
600     src_argb += 8;
601     dst_u += 1;
602     dst_v += 1;
603   }
604   if (width & 1) {
605     uint8 ab = src_argb[0];
606     uint8 ag = src_argb[1];
607     uint8 ar = src_argb[2];
608     dst_u[0] = RGBToU(ar, ag, ab);
609     dst_v[0] = RGBToV(ar, ag, ab);
610   }
611 }
612 
ARGBToUV411Row_C(const uint8 * src_argb,uint8 * dst_u,uint8 * dst_v,int width)613 void ARGBToUV411Row_C(const uint8* src_argb,
614                       uint8* dst_u, uint8* dst_v, int width) {
615   int x;
616   for (x = 0; x < width - 3; x += 4) {
617     uint8 ab = (src_argb[0] + src_argb[4] + src_argb[8] + src_argb[12]) >> 2;
618     uint8 ag = (src_argb[1] + src_argb[5] + src_argb[9] + src_argb[13]) >> 2;
619     uint8 ar = (src_argb[2] + src_argb[6] + src_argb[10] + src_argb[14]) >> 2;
620     dst_u[0] = RGBToU(ar, ag, ab);
621     dst_v[0] = RGBToV(ar, ag, ab);
622     src_argb += 16;
623     dst_u += 1;
624     dst_v += 1;
625   }
626   if ((width & 3) == 3) {
627     uint8 ab = (src_argb[0] + src_argb[4] + src_argb[8]) / 3;
628     uint8 ag = (src_argb[1] + src_argb[5] + src_argb[9]) / 3;
629     uint8 ar = (src_argb[2] + src_argb[6] + src_argb[10]) / 3;
630     dst_u[0] = RGBToU(ar, ag, ab);
631     dst_v[0] = RGBToV(ar, ag, ab);
632   } else if ((width & 3) == 2) {
633     uint8 ab = (src_argb[0] + src_argb[4]) >> 1;
634     uint8 ag = (src_argb[1] + src_argb[5]) >> 1;
635     uint8 ar = (src_argb[2] + src_argb[6]) >> 1;
636     dst_u[0] = RGBToU(ar, ag, ab);
637     dst_v[0] = RGBToV(ar, ag, ab);
638   } else if ((width & 3) == 1) {
639     uint8 ab = src_argb[0];
640     uint8 ag = src_argb[1];
641     uint8 ar = src_argb[2];
642     dst_u[0] = RGBToU(ar, ag, ab);
643     dst_v[0] = RGBToV(ar, ag, ab);
644   }
645 }
646 
ARGBGrayRow_C(const uint8 * src_argb,uint8 * dst_argb,int width)647 void ARGBGrayRow_C(const uint8* src_argb, uint8* dst_argb, int width) {
648   int x;
649   for (x = 0; x < width; ++x) {
650     uint8 y = RGBToYJ(src_argb[2], src_argb[1], src_argb[0]);
651     dst_argb[2] = dst_argb[1] = dst_argb[0] = y;
652     dst_argb[3] = src_argb[3];
653     dst_argb += 4;
654     src_argb += 4;
655   }
656 }
657 
658 // Convert a row of image to Sepia tone.
ARGBSepiaRow_C(uint8 * dst_argb,int width)659 void ARGBSepiaRow_C(uint8* dst_argb, int width) {
660   int x;
661   for (x = 0; x < width; ++x) {
662     int b = dst_argb[0];
663     int g = dst_argb[1];
664     int r = dst_argb[2];
665     int sb = (b * 17 + g * 68 + r * 35) >> 7;
666     int sg = (b * 22 + g * 88 + r * 45) >> 7;
667     int sr = (b * 24 + g * 98 + r * 50) >> 7;
668     // b does not over flow. a is preserved from original.
669     dst_argb[0] = sb;
670     dst_argb[1] = clamp255(sg);
671     dst_argb[2] = clamp255(sr);
672     dst_argb += 4;
673   }
674 }
675 
676 // Apply color matrix to a row of image. Matrix is signed.
677 // TODO(fbarchard): Consider adding rounding (+32).
ARGBColorMatrixRow_C(const uint8 * src_argb,uint8 * dst_argb,const int8 * matrix_argb,int width)678 void ARGBColorMatrixRow_C(const uint8* src_argb, uint8* dst_argb,
679                           const int8* matrix_argb, int width) {
680   int x;
681   for (x = 0; x < width; ++x) {
682     int b = src_argb[0];
683     int g = src_argb[1];
684     int r = src_argb[2];
685     int a = src_argb[3];
686     int sb = (b * matrix_argb[0] + g * matrix_argb[1] +
687               r * matrix_argb[2] + a * matrix_argb[3]) >> 6;
688     int sg = (b * matrix_argb[4] + g * matrix_argb[5] +
689               r * matrix_argb[6] + a * matrix_argb[7]) >> 6;
690     int sr = (b * matrix_argb[8] + g * matrix_argb[9] +
691               r * matrix_argb[10] + a * matrix_argb[11]) >> 6;
692     int sa = (b * matrix_argb[12] + g * matrix_argb[13] +
693               r * matrix_argb[14] + a * matrix_argb[15]) >> 6;
694     dst_argb[0] = Clamp(sb);
695     dst_argb[1] = Clamp(sg);
696     dst_argb[2] = Clamp(sr);
697     dst_argb[3] = Clamp(sa);
698     src_argb += 4;
699     dst_argb += 4;
700   }
701 }
702 
703 // Apply color table to a row of image.
ARGBColorTableRow_C(uint8 * dst_argb,const uint8 * table_argb,int width)704 void ARGBColorTableRow_C(uint8* dst_argb, const uint8* table_argb, int width) {
705   int x;
706   for (x = 0; x < width; ++x) {
707     int b = dst_argb[0];
708     int g = dst_argb[1];
709     int r = dst_argb[2];
710     int a = dst_argb[3];
711     dst_argb[0] = table_argb[b * 4 + 0];
712     dst_argb[1] = table_argb[g * 4 + 1];
713     dst_argb[2] = table_argb[r * 4 + 2];
714     dst_argb[3] = table_argb[a * 4 + 3];
715     dst_argb += 4;
716   }
717 }
718 
719 // Apply color table to a row of image.
RGBColorTableRow_C(uint8 * dst_argb,const uint8 * table_argb,int width)720 void RGBColorTableRow_C(uint8* dst_argb, const uint8* table_argb, int width) {
721   int x;
722   for (x = 0; x < width; ++x) {
723     int b = dst_argb[0];
724     int g = dst_argb[1];
725     int r = dst_argb[2];
726     dst_argb[0] = table_argb[b * 4 + 0];
727     dst_argb[1] = table_argb[g * 4 + 1];
728     dst_argb[2] = table_argb[r * 4 + 2];
729     dst_argb += 4;
730   }
731 }
732 
ARGBQuantizeRow_C(uint8 * dst_argb,int scale,int interval_size,int interval_offset,int width)733 void ARGBQuantizeRow_C(uint8* dst_argb, int scale, int interval_size,
734                        int interval_offset, int width) {
735   int x;
736   for (x = 0; x < width; ++x) {
737     int b = dst_argb[0];
738     int g = dst_argb[1];
739     int r = dst_argb[2];
740     dst_argb[0] = (b * scale >> 16) * interval_size + interval_offset;
741     dst_argb[1] = (g * scale >> 16) * interval_size + interval_offset;
742     dst_argb[2] = (r * scale >> 16) * interval_size + interval_offset;
743     dst_argb += 4;
744   }
745 }
746 
747 #define REPEAT8(v) (v) | ((v) << 8)
748 #define SHADE(f, v) v * f >> 24
749 
ARGBShadeRow_C(const uint8 * src_argb,uint8 * dst_argb,int width,uint32 value)750 void ARGBShadeRow_C(const uint8* src_argb, uint8* dst_argb, int width,
751                     uint32 value) {
752   const uint32 b_scale = REPEAT8(value & 0xff);
753   const uint32 g_scale = REPEAT8((value >> 8) & 0xff);
754   const uint32 r_scale = REPEAT8((value >> 16) & 0xff);
755   const uint32 a_scale = REPEAT8(value >> 24);
756 
757   int i;
758   for (i = 0; i < width; ++i) {
759     const uint32 b = REPEAT8(src_argb[0]);
760     const uint32 g = REPEAT8(src_argb[1]);
761     const uint32 r = REPEAT8(src_argb[2]);
762     const uint32 a = REPEAT8(src_argb[3]);
763     dst_argb[0] = SHADE(b, b_scale);
764     dst_argb[1] = SHADE(g, g_scale);
765     dst_argb[2] = SHADE(r, r_scale);
766     dst_argb[3] = SHADE(a, a_scale);
767     src_argb += 4;
768     dst_argb += 4;
769   }
770 }
771 #undef REPEAT8
772 #undef SHADE
773 
774 #define REPEAT8(v) (v) | ((v) << 8)
775 #define SHADE(f, v) v * f >> 16
776 
ARGBMultiplyRow_C(const uint8 * src_argb0,const uint8 * src_argb1,uint8 * dst_argb,int width)777 void ARGBMultiplyRow_C(const uint8* src_argb0, const uint8* src_argb1,
778                        uint8* dst_argb, int width) {
779   int i;
780   for (i = 0; i < width; ++i) {
781     const uint32 b = REPEAT8(src_argb0[0]);
782     const uint32 g = REPEAT8(src_argb0[1]);
783     const uint32 r = REPEAT8(src_argb0[2]);
784     const uint32 a = REPEAT8(src_argb0[3]);
785     const uint32 b_scale = src_argb1[0];
786     const uint32 g_scale = src_argb1[1];
787     const uint32 r_scale = src_argb1[2];
788     const uint32 a_scale = src_argb1[3];
789     dst_argb[0] = SHADE(b, b_scale);
790     dst_argb[1] = SHADE(g, g_scale);
791     dst_argb[2] = SHADE(r, r_scale);
792     dst_argb[3] = SHADE(a, a_scale);
793     src_argb0 += 4;
794     src_argb1 += 4;
795     dst_argb += 4;
796   }
797 }
798 #undef REPEAT8
799 #undef SHADE
800 
801 #define SHADE(f, v) clamp255(v + f)
802 
ARGBAddRow_C(const uint8 * src_argb0,const uint8 * src_argb1,uint8 * dst_argb,int width)803 void ARGBAddRow_C(const uint8* src_argb0, const uint8* src_argb1,
804                   uint8* dst_argb, int width) {
805   int i;
806   for (i = 0; i < width; ++i) {
807     const int b = src_argb0[0];
808     const int g = src_argb0[1];
809     const int r = src_argb0[2];
810     const int a = src_argb0[3];
811     const int b_add = src_argb1[0];
812     const int g_add = src_argb1[1];
813     const int r_add = src_argb1[2];
814     const int a_add = src_argb1[3];
815     dst_argb[0] = SHADE(b, b_add);
816     dst_argb[1] = SHADE(g, g_add);
817     dst_argb[2] = SHADE(r, r_add);
818     dst_argb[3] = SHADE(a, a_add);
819     src_argb0 += 4;
820     src_argb1 += 4;
821     dst_argb += 4;
822   }
823 }
824 #undef SHADE
825 
826 #define SHADE(f, v) clamp0(f - v)
827 
ARGBSubtractRow_C(const uint8 * src_argb0,const uint8 * src_argb1,uint8 * dst_argb,int width)828 void ARGBSubtractRow_C(const uint8* src_argb0, const uint8* src_argb1,
829                        uint8* dst_argb, int width) {
830   int i;
831   for (i = 0; i < width; ++i) {
832     const int b = src_argb0[0];
833     const int g = src_argb0[1];
834     const int r = src_argb0[2];
835     const int a = src_argb0[3];
836     const int b_sub = src_argb1[0];
837     const int g_sub = src_argb1[1];
838     const int r_sub = src_argb1[2];
839     const int a_sub = src_argb1[3];
840     dst_argb[0] = SHADE(b, b_sub);
841     dst_argb[1] = SHADE(g, g_sub);
842     dst_argb[2] = SHADE(r, r_sub);
843     dst_argb[3] = SHADE(a, a_sub);
844     src_argb0 += 4;
845     src_argb1 += 4;
846     dst_argb += 4;
847   }
848 }
849 #undef SHADE
850 
851 // Sobel functions which mimics SSSE3.
SobelXRow_C(const uint8 * src_y0,const uint8 * src_y1,const uint8 * src_y2,uint8 * dst_sobelx,int width)852 void SobelXRow_C(const uint8* src_y0, const uint8* src_y1, const uint8* src_y2,
853                  uint8* dst_sobelx, int width) {
854   int i;
855   for (i = 0; i < width; ++i) {
856     int a = src_y0[i];
857     int b = src_y1[i];
858     int c = src_y2[i];
859     int a_sub = src_y0[i + 2];
860     int b_sub = src_y1[i + 2];
861     int c_sub = src_y2[i + 2];
862     int a_diff = a - a_sub;
863     int b_diff = b - b_sub;
864     int c_diff = c - c_sub;
865     int sobel = Abs(a_diff + b_diff * 2 + c_diff);
866     dst_sobelx[i] = (uint8)(clamp255(sobel));
867   }
868 }
869 
SobelYRow_C(const uint8 * src_y0,const uint8 * src_y1,uint8 * dst_sobely,int width)870 void SobelYRow_C(const uint8* src_y0, const uint8* src_y1,
871                  uint8* dst_sobely, int width) {
872   int i;
873   for (i = 0; i < width; ++i) {
874     int a = src_y0[i + 0];
875     int b = src_y0[i + 1];
876     int c = src_y0[i + 2];
877     int a_sub = src_y1[i + 0];
878     int b_sub = src_y1[i + 1];
879     int c_sub = src_y1[i + 2];
880     int a_diff = a - a_sub;
881     int b_diff = b - b_sub;
882     int c_diff = c - c_sub;
883     int sobel = Abs(a_diff + b_diff * 2 + c_diff);
884     dst_sobely[i] = (uint8)(clamp255(sobel));
885   }
886 }
887 
SobelRow_C(const uint8 * src_sobelx,const uint8 * src_sobely,uint8 * dst_argb,int width)888 void SobelRow_C(const uint8* src_sobelx, const uint8* src_sobely,
889                 uint8* dst_argb, int width) {
890   int i;
891   for (i = 0; i < width; ++i) {
892     int r = src_sobelx[i];
893     int b = src_sobely[i];
894     int s = clamp255(r + b);
895     dst_argb[0] = (uint8)(s);
896     dst_argb[1] = (uint8)(s);
897     dst_argb[2] = (uint8)(s);
898     dst_argb[3] = (uint8)(255u);
899     dst_argb += 4;
900   }
901 }
902 
SobelToPlaneRow_C(const uint8 * src_sobelx,const uint8 * src_sobely,uint8 * dst_y,int width)903 void SobelToPlaneRow_C(const uint8* src_sobelx, const uint8* src_sobely,
904                        uint8* dst_y, int width) {
905   int i;
906   for (i = 0; i < width; ++i) {
907     int r = src_sobelx[i];
908     int b = src_sobely[i];
909     int s = clamp255(r + b);
910     dst_y[i] = (uint8)(s);
911   }
912 }
913 
SobelXYRow_C(const uint8 * src_sobelx,const uint8 * src_sobely,uint8 * dst_argb,int width)914 void SobelXYRow_C(const uint8* src_sobelx, const uint8* src_sobely,
915                   uint8* dst_argb, int width) {
916   int i;
917   for (i = 0; i < width; ++i) {
918     int r = src_sobelx[i];
919     int b = src_sobely[i];
920     int g = clamp255(r + b);
921     dst_argb[0] = (uint8)(b);
922     dst_argb[1] = (uint8)(g);
923     dst_argb[2] = (uint8)(r);
924     dst_argb[3] = (uint8)(255u);
925     dst_argb += 4;
926   }
927 }
928 
I400ToARGBRow_C(const uint8 * src_y,uint8 * dst_argb,int width)929 void I400ToARGBRow_C(const uint8* src_y, uint8* dst_argb, int width) {
930   // Copy a Y to RGB.
931   int x;
932   for (x = 0; x < width; ++x) {
933     uint8 y = src_y[0];
934     dst_argb[2] = dst_argb[1] = dst_argb[0] = y;
935     dst_argb[3] = 255u;
936     dst_argb += 4;
937     ++src_y;
938   }
939 }
940 
941 // C reference code that mimics the YUV assembly.
942 
943 #define YG 74 /* (int8)(1.164 * 64 + 0.5) */
944 
945 #define UB 127 /* min(63,(int8)(2.018 * 64)) */
946 #define UG -25 /* (int8)(-0.391 * 64 - 0.5) */
947 #define UR 0
948 
949 #define VB 0
950 #define VG -52 /* (int8)(-0.813 * 64 - 0.5) */
951 #define VR 102 /* (int8)(1.596 * 64 + 0.5) */
952 
953 // Bias
954 #define BB UB * 128 + VB * 128
955 #define BG UG * 128 + VG * 128
956 #define BR UR * 128 + VR * 128
957 
YuvPixel(uint8 y,uint8 u,uint8 v,uint8 * b,uint8 * g,uint8 * r)958 static __inline void YuvPixel(uint8 y, uint8 u, uint8 v,
959                               uint8* b, uint8* g, uint8* r) {
960   int32 y1 = ((int32)(y) - 16) * YG;
961   *b = Clamp((int32)((u * UB + v * VB) - (BB) + y1) >> 6);
962   *g = Clamp((int32)((u * UG + v * VG) - (BG) + y1) >> 6);
963   *r = Clamp((int32)((u * UR + v * VR) - (BR) + y1) >> 6);
964 }
965 
966 #if !defined(LIBYUV_DISABLE_NEON) && \
967     (defined(__ARM_NEON__) || defined(LIBYUV_NEON))
968 // C mimic assembly.
969 // TODO(fbarchard): Remove subsampling from Neon.
I444ToARGBRow_C(const uint8 * src_y,const uint8 * src_u,const uint8 * src_v,uint8 * rgb_buf,int width)970 void I444ToARGBRow_C(const uint8* src_y,
971                      const uint8* src_u,
972                      const uint8* src_v,
973                      uint8* rgb_buf,
974                      int width) {
975   int x;
976   for (x = 0; x < width - 1; x += 2) {
977     uint8 u = (src_u[0] + src_u[1] + 1) >> 1;
978     uint8 v = (src_v[0] + src_v[1] + 1) >> 1;
979     YuvPixel(src_y[0], u, v, rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
980     rgb_buf[3] = 255;
981     YuvPixel(src_y[1], u, v, rgb_buf + 4, rgb_buf + 5, rgb_buf + 6);
982     rgb_buf[7] = 255;
983     src_y += 2;
984     src_u += 2;
985     src_v += 2;
986     rgb_buf += 8;  // Advance 2 pixels.
987   }
988   if (width & 1) {
989     YuvPixel(src_y[0], src_u[0], src_v[0],
990              rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
991   }
992 }
993 #else
I444ToARGBRow_C(const uint8 * src_y,const uint8 * src_u,const uint8 * src_v,uint8 * rgb_buf,int width)994 void I444ToARGBRow_C(const uint8* src_y,
995                      const uint8* src_u,
996                      const uint8* src_v,
997                      uint8* rgb_buf,
998                      int width) {
999   int x;
1000   for (x = 0; x < width; ++x) {
1001     YuvPixel(src_y[0], src_u[0], src_v[0],
1002              rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
1003     rgb_buf[3] = 255;
1004     src_y += 1;
1005     src_u += 1;
1006     src_v += 1;
1007     rgb_buf += 4;  // Advance 1 pixel.
1008   }
1009 }
1010 #endif
1011 // Also used for 420
I422ToARGBRow_C(const uint8 * src_y,const uint8 * src_u,const uint8 * src_v,uint8 * rgb_buf,int width)1012 void I422ToARGBRow_C(const uint8* src_y,
1013                      const uint8* src_u,
1014                      const uint8* src_v,
1015                      uint8* rgb_buf,
1016                      int width) {
1017   int x;
1018   for (x = 0; x < width - 1; x += 2) {
1019     YuvPixel(src_y[0], src_u[0], src_v[0],
1020              rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
1021     rgb_buf[3] = 255;
1022     YuvPixel(src_y[1], src_u[0], src_v[0],
1023              rgb_buf + 4, rgb_buf + 5, rgb_buf + 6);
1024     rgb_buf[7] = 255;
1025     src_y += 2;
1026     src_u += 1;
1027     src_v += 1;
1028     rgb_buf += 8;  // Advance 2 pixels.
1029   }
1030   if (width & 1) {
1031     YuvPixel(src_y[0], src_u[0], src_v[0],
1032              rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
1033     rgb_buf[3] = 255;
1034   }
1035 }
1036 
I422ToRGB24Row_C(const uint8 * src_y,const uint8 * src_u,const uint8 * src_v,uint8 * rgb_buf,int width)1037 void I422ToRGB24Row_C(const uint8* src_y,
1038                       const uint8* src_u,
1039                       const uint8* src_v,
1040                       uint8* rgb_buf,
1041                       int width) {
1042   int x;
1043   for (x = 0; x < width - 1; x += 2) {
1044     YuvPixel(src_y[0], src_u[0], src_v[0],
1045              rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
1046     YuvPixel(src_y[1], src_u[0], src_v[0],
1047              rgb_buf + 3, rgb_buf + 4, rgb_buf + 5);
1048     src_y += 2;
1049     src_u += 1;
1050     src_v += 1;
1051     rgb_buf += 6;  // Advance 2 pixels.
1052   }
1053   if (width & 1) {
1054     YuvPixel(src_y[0], src_u[0], src_v[0],
1055              rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
1056   }
1057 }
1058 
I422ToRAWRow_C(const uint8 * src_y,const uint8 * src_u,const uint8 * src_v,uint8 * rgb_buf,int width)1059 void I422ToRAWRow_C(const uint8* src_y,
1060                     const uint8* src_u,
1061                     const uint8* src_v,
1062                     uint8* rgb_buf,
1063                     int width) {
1064   int x;
1065   for (x = 0; x < width - 1; x += 2) {
1066     YuvPixel(src_y[0], src_u[0], src_v[0],
1067              rgb_buf + 2, rgb_buf + 1, rgb_buf + 0);
1068     YuvPixel(src_y[1], src_u[0], src_v[0],
1069              rgb_buf + 5, rgb_buf + 4, rgb_buf + 3);
1070     src_y += 2;
1071     src_u += 1;
1072     src_v += 1;
1073     rgb_buf += 6;  // Advance 2 pixels.
1074   }
1075   if (width & 1) {
1076     YuvPixel(src_y[0], src_u[0], src_v[0],
1077              rgb_buf + 2, rgb_buf + 1, rgb_buf + 0);
1078   }
1079 }
1080 
I422ToARGB4444Row_C(const uint8 * src_y,const uint8 * src_u,const uint8 * src_v,uint8 * dst_argb4444,int width)1081 void I422ToARGB4444Row_C(const uint8* src_y,
1082                          const uint8* src_u,
1083                          const uint8* src_v,
1084                          uint8* dst_argb4444,
1085                          int width) {
1086   uint8 b0;
1087   uint8 g0;
1088   uint8 r0;
1089   uint8 b1;
1090   uint8 g1;
1091   uint8 r1;
1092   int x;
1093   for (x = 0; x < width - 1; x += 2) {
1094     YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0);
1095     YuvPixel(src_y[1], src_u[0], src_v[0], &b1, &g1, &r1);
1096     b0 = b0 >> 4;
1097     g0 = g0 >> 4;
1098     r0 = r0 >> 4;
1099     b1 = b1 >> 4;
1100     g1 = g1 >> 4;
1101     r1 = r1 >> 4;
1102     *(uint32*)(dst_argb4444) = b0 | (g0 << 4) | (r0 << 8) |
1103         (b1 << 16) | (g1 << 20) | (r1 << 24) | 0xf000f000;
1104     src_y += 2;
1105     src_u += 1;
1106     src_v += 1;
1107     dst_argb4444 += 4;  // Advance 2 pixels.
1108   }
1109   if (width & 1) {
1110     YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0);
1111     b0 = b0 >> 4;
1112     g0 = g0 >> 4;
1113     r0 = r0 >> 4;
1114     *(uint16*)(dst_argb4444) = b0 | (g0 << 4) | (r0 << 8) |
1115         0xf000;
1116   }
1117 }
1118 
I422ToARGB1555Row_C(const uint8 * src_y,const uint8 * src_u,const uint8 * src_v,uint8 * dst_argb1555,int width)1119 void I422ToARGB1555Row_C(const uint8* src_y,
1120                          const uint8* src_u,
1121                          const uint8* src_v,
1122                          uint8* dst_argb1555,
1123                          int width) {
1124   uint8 b0;
1125   uint8 g0;
1126   uint8 r0;
1127   uint8 b1;
1128   uint8 g1;
1129   uint8 r1;
1130   int x;
1131   for (x = 0; x < width - 1; x += 2) {
1132     YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0);
1133     YuvPixel(src_y[1], src_u[0], src_v[0], &b1, &g1, &r1);
1134     b0 = b0 >> 3;
1135     g0 = g0 >> 3;
1136     r0 = r0 >> 3;
1137     b1 = b1 >> 3;
1138     g1 = g1 >> 3;
1139     r1 = r1 >> 3;
1140     *(uint32*)(dst_argb1555) = b0 | (g0 << 5) | (r0 << 10) |
1141         (b1 << 16) | (g1 << 21) | (r1 << 26) | 0x80008000;
1142     src_y += 2;
1143     src_u += 1;
1144     src_v += 1;
1145     dst_argb1555 += 4;  // Advance 2 pixels.
1146   }
1147   if (width & 1) {
1148     YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0);
1149     b0 = b0 >> 3;
1150     g0 = g0 >> 3;
1151     r0 = r0 >> 3;
1152     *(uint16*)(dst_argb1555) = b0 | (g0 << 5) | (r0 << 10) |
1153         0x8000;
1154   }
1155 }
1156 
I422ToRGB565Row_C(const uint8 * src_y,const uint8 * src_u,const uint8 * src_v,uint8 * dst_rgb565,int width)1157 void I422ToRGB565Row_C(const uint8* src_y,
1158                        const uint8* src_u,
1159                        const uint8* src_v,
1160                        uint8* dst_rgb565,
1161                        int width) {
1162   uint8 b0;
1163   uint8 g0;
1164   uint8 r0;
1165   uint8 b1;
1166   uint8 g1;
1167   uint8 r1;
1168   int x;
1169   for (x = 0; x < width - 1; x += 2) {
1170     YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0);
1171     YuvPixel(src_y[1], src_u[0], src_v[0], &b1, &g1, &r1);
1172     b0 = b0 >> 3;
1173     g0 = g0 >> 2;
1174     r0 = r0 >> 3;
1175     b1 = b1 >> 3;
1176     g1 = g1 >> 2;
1177     r1 = r1 >> 3;
1178     *(uint32*)(dst_rgb565) = b0 | (g0 << 5) | (r0 << 11) |
1179         (b1 << 16) | (g1 << 21) | (r1 << 27);
1180     src_y += 2;
1181     src_u += 1;
1182     src_v += 1;
1183     dst_rgb565 += 4;  // Advance 2 pixels.
1184   }
1185   if (width & 1) {
1186     YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0);
1187     b0 = b0 >> 3;
1188     g0 = g0 >> 2;
1189     r0 = r0 >> 3;
1190     *(uint16*)(dst_rgb565) = b0 | (g0 << 5) | (r0 << 11);
1191   }
1192 }
1193 
I411ToARGBRow_C(const uint8 * src_y,const uint8 * src_u,const uint8 * src_v,uint8 * rgb_buf,int width)1194 void I411ToARGBRow_C(const uint8* src_y,
1195                      const uint8* src_u,
1196                      const uint8* src_v,
1197                      uint8* rgb_buf,
1198                      int width) {
1199   int x;
1200   for (x = 0; x < width - 3; x += 4) {
1201     YuvPixel(src_y[0], src_u[0], src_v[0],
1202              rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
1203     rgb_buf[3] = 255;
1204     YuvPixel(src_y[1], src_u[0], src_v[0],
1205              rgb_buf + 4, rgb_buf + 5, rgb_buf + 6);
1206     rgb_buf[7] = 255;
1207     YuvPixel(src_y[2], src_u[0], src_v[0],
1208              rgb_buf + 8, rgb_buf + 9, rgb_buf + 10);
1209     rgb_buf[11] = 255;
1210     YuvPixel(src_y[3], src_u[0], src_v[0],
1211              rgb_buf + 12, rgb_buf + 13, rgb_buf + 14);
1212     rgb_buf[15] = 255;
1213     src_y += 4;
1214     src_u += 1;
1215     src_v += 1;
1216     rgb_buf += 16;  // Advance 4 pixels.
1217   }
1218   if (width & 2) {
1219     YuvPixel(src_y[0], src_u[0], src_v[0],
1220              rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
1221     rgb_buf[3] = 255;
1222     YuvPixel(src_y[1], src_u[0], src_v[0],
1223              rgb_buf + 4, rgb_buf + 5, rgb_buf + 6);
1224     rgb_buf[7] = 255;
1225     src_y += 2;
1226     rgb_buf += 8;  // Advance 2 pixels.
1227   }
1228   if (width & 1) {
1229     YuvPixel(src_y[0], src_u[0], src_v[0],
1230              rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
1231     rgb_buf[3] = 255;
1232   }
1233 }
1234 
NV12ToARGBRow_C(const uint8 * src_y,const uint8 * usrc_v,uint8 * rgb_buf,int width)1235 void NV12ToARGBRow_C(const uint8* src_y,
1236                      const uint8* usrc_v,
1237                      uint8* rgb_buf,
1238                      int width) {
1239   int x;
1240   for (x = 0; x < width - 1; x += 2) {
1241     YuvPixel(src_y[0], usrc_v[0], usrc_v[1],
1242              rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
1243     rgb_buf[3] = 255;
1244     YuvPixel(src_y[1], usrc_v[0], usrc_v[1],
1245              rgb_buf + 4, rgb_buf + 5, rgb_buf + 6);
1246     rgb_buf[7] = 255;
1247     src_y += 2;
1248     usrc_v += 2;
1249     rgb_buf += 8;  // Advance 2 pixels.
1250   }
1251   if (width & 1) {
1252     YuvPixel(src_y[0], usrc_v[0], usrc_v[1],
1253              rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
1254     rgb_buf[3] = 255;
1255   }
1256 }
1257 
NV21ToARGBRow_C(const uint8 * src_y,const uint8 * src_vu,uint8 * rgb_buf,int width)1258 void NV21ToARGBRow_C(const uint8* src_y,
1259                      const uint8* src_vu,
1260                      uint8* rgb_buf,
1261                      int width) {
1262   int x;
1263   for (x = 0; x < width - 1; x += 2) {
1264     YuvPixel(src_y[0], src_vu[1], src_vu[0],
1265              rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
1266     rgb_buf[3] = 255;
1267 
1268     YuvPixel(src_y[1], src_vu[1], src_vu[0],
1269              rgb_buf + 4, rgb_buf + 5, rgb_buf + 6);
1270     rgb_buf[7] = 255;
1271 
1272     src_y += 2;
1273     src_vu += 2;
1274     rgb_buf += 8;  // Advance 2 pixels.
1275   }
1276   if (width & 1) {
1277     YuvPixel(src_y[0], src_vu[1], src_vu[0],
1278              rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
1279     rgb_buf[3] = 255;
1280   }
1281 }
1282 
NV12ToRGB565Row_C(const uint8 * src_y,const uint8 * usrc_v,uint8 * dst_rgb565,int width)1283 void NV12ToRGB565Row_C(const uint8* src_y,
1284                        const uint8* usrc_v,
1285                        uint8* dst_rgb565,
1286                        int width) {
1287   uint8 b0;
1288   uint8 g0;
1289   uint8 r0;
1290   uint8 b1;
1291   uint8 g1;
1292   uint8 r1;
1293   int x;
1294   for (x = 0; x < width - 1; x += 2) {
1295     YuvPixel(src_y[0], usrc_v[0], usrc_v[1], &b0, &g0, &r0);
1296     YuvPixel(src_y[1], usrc_v[0], usrc_v[1], &b1, &g1, &r1);
1297     b0 = b0 >> 3;
1298     g0 = g0 >> 2;
1299     r0 = r0 >> 3;
1300     b1 = b1 >> 3;
1301     g1 = g1 >> 2;
1302     r1 = r1 >> 3;
1303     *(uint32*)(dst_rgb565) = b0 | (g0 << 5) | (r0 << 11) |
1304         (b1 << 16) | (g1 << 21) | (r1 << 27);
1305     src_y += 2;
1306     usrc_v += 2;
1307     dst_rgb565 += 4;  // Advance 2 pixels.
1308   }
1309   if (width & 1) {
1310     YuvPixel(src_y[0], usrc_v[0], usrc_v[1], &b0, &g0, &r0);
1311     b0 = b0 >> 3;
1312     g0 = g0 >> 2;
1313     r0 = r0 >> 3;
1314     *(uint16*)(dst_rgb565) = b0 | (g0 << 5) | (r0 << 11);
1315   }
1316 }
1317 
NV21ToRGB565Row_C(const uint8 * src_y,const uint8 * vsrc_u,uint8 * dst_rgb565,int width)1318 void NV21ToRGB565Row_C(const uint8* src_y,
1319                        const uint8* vsrc_u,
1320                        uint8* dst_rgb565,
1321                        int width) {
1322   uint8 b0;
1323   uint8 g0;
1324   uint8 r0;
1325   uint8 b1;
1326   uint8 g1;
1327   uint8 r1;
1328   int x;
1329   for (x = 0; x < width - 1; x += 2) {
1330     YuvPixel(src_y[0], vsrc_u[1], vsrc_u[0], &b0, &g0, &r0);
1331     YuvPixel(src_y[1], vsrc_u[1], vsrc_u[0], &b1, &g1, &r1);
1332     b0 = b0 >> 3;
1333     g0 = g0 >> 2;
1334     r0 = r0 >> 3;
1335     b1 = b1 >> 3;
1336     g1 = g1 >> 2;
1337     r1 = r1 >> 3;
1338     *(uint32*)(dst_rgb565) = b0 | (g0 << 5) | (r0 << 11) |
1339         (b1 << 16) | (g1 << 21) | (r1 << 27);
1340     src_y += 2;
1341     vsrc_u += 2;
1342     dst_rgb565 += 4;  // Advance 2 pixels.
1343   }
1344   if (width & 1) {
1345     YuvPixel(src_y[0], vsrc_u[1], vsrc_u[0], &b0, &g0, &r0);
1346     b0 = b0 >> 3;
1347     g0 = g0 >> 2;
1348     r0 = r0 >> 3;
1349     *(uint16*)(dst_rgb565) = b0 | (g0 << 5) | (r0 << 11);
1350   }
1351 }
1352 
YUY2ToARGBRow_C(const uint8 * src_yuy2,uint8 * rgb_buf,int width)1353 void YUY2ToARGBRow_C(const uint8* src_yuy2,
1354                      uint8* rgb_buf,
1355                      int width) {
1356   int x;
1357   for (x = 0; x < width - 1; x += 2) {
1358     YuvPixel(src_yuy2[0], src_yuy2[1], src_yuy2[3],
1359              rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
1360     rgb_buf[3] = 255;
1361     YuvPixel(src_yuy2[2], src_yuy2[1], src_yuy2[3],
1362              rgb_buf + 4, rgb_buf + 5, rgb_buf + 6);
1363     rgb_buf[7] = 255;
1364     src_yuy2 += 4;
1365     rgb_buf += 8;  // Advance 2 pixels.
1366   }
1367   if (width & 1) {
1368     YuvPixel(src_yuy2[0], src_yuy2[1], src_yuy2[3],
1369              rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
1370     rgb_buf[3] = 255;
1371   }
1372 }
1373 
UYVYToARGBRow_C(const uint8 * src_uyvy,uint8 * rgb_buf,int width)1374 void UYVYToARGBRow_C(const uint8* src_uyvy,
1375                      uint8* rgb_buf,
1376                      int width) {
1377   int x;
1378   for (x = 0; x < width - 1; x += 2) {
1379     YuvPixel(src_uyvy[1], src_uyvy[0], src_uyvy[2],
1380              rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
1381     rgb_buf[3] = 255;
1382     YuvPixel(src_uyvy[3], src_uyvy[0], src_uyvy[2],
1383              rgb_buf + 4, rgb_buf + 5, rgb_buf + 6);
1384     rgb_buf[7] = 255;
1385     src_uyvy += 4;
1386     rgb_buf += 8;  // Advance 2 pixels.
1387   }
1388   if (width & 1) {
1389     YuvPixel(src_uyvy[1], src_uyvy[0], src_uyvy[2],
1390              rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
1391     rgb_buf[3] = 255;
1392   }
1393 }
1394 
I422ToBGRARow_C(const uint8 * src_y,const uint8 * src_u,const uint8 * src_v,uint8 * rgb_buf,int width)1395 void I422ToBGRARow_C(const uint8* src_y,
1396                      const uint8* src_u,
1397                      const uint8* src_v,
1398                      uint8* rgb_buf,
1399                      int width) {
1400   int x;
1401   for (x = 0; x < width - 1; x += 2) {
1402     YuvPixel(src_y[0], src_u[0], src_v[0],
1403              rgb_buf + 3, rgb_buf + 2, rgb_buf + 1);
1404     rgb_buf[0] = 255;
1405     YuvPixel(src_y[1], src_u[0], src_v[0],
1406              rgb_buf + 7, rgb_buf + 6, rgb_buf + 5);
1407     rgb_buf[4] = 255;
1408     src_y += 2;
1409     src_u += 1;
1410     src_v += 1;
1411     rgb_buf += 8;  // Advance 2 pixels.
1412   }
1413   if (width & 1) {
1414     YuvPixel(src_y[0], src_u[0], src_v[0],
1415              rgb_buf + 3, rgb_buf + 2, rgb_buf + 1);
1416     rgb_buf[0] = 255;
1417   }
1418 }
1419 
I422ToABGRRow_C(const uint8 * src_y,const uint8 * src_u,const uint8 * src_v,uint8 * rgb_buf,int width)1420 void I422ToABGRRow_C(const uint8* src_y,
1421                      const uint8* src_u,
1422                      const uint8* src_v,
1423                      uint8* rgb_buf,
1424                      int width) {
1425   int x;
1426   for (x = 0; x < width - 1; x += 2) {
1427     YuvPixel(src_y[0], src_u[0], src_v[0],
1428              rgb_buf + 2, rgb_buf + 1, rgb_buf + 0);
1429     rgb_buf[3] = 255;
1430     YuvPixel(src_y[1], src_u[0], src_v[0],
1431              rgb_buf + 6, rgb_buf + 5, rgb_buf + 4);
1432     rgb_buf[7] = 255;
1433     src_y += 2;
1434     src_u += 1;
1435     src_v += 1;
1436     rgb_buf += 8;  // Advance 2 pixels.
1437   }
1438   if (width & 1) {
1439     YuvPixel(src_y[0], src_u[0], src_v[0],
1440              rgb_buf + 2, rgb_buf + 1, rgb_buf + 0);
1441     rgb_buf[3] = 255;
1442   }
1443 }
1444 
I422ToRGBARow_C(const uint8 * src_y,const uint8 * src_u,const uint8 * src_v,uint8 * rgb_buf,int width)1445 void I422ToRGBARow_C(const uint8* src_y,
1446                      const uint8* src_u,
1447                      const uint8* src_v,
1448                      uint8* rgb_buf,
1449                      int width) {
1450   int x;
1451   for (x = 0; x < width - 1; x += 2) {
1452     YuvPixel(src_y[0], src_u[0], src_v[0],
1453              rgb_buf + 1, rgb_buf + 2, rgb_buf + 3);
1454     rgb_buf[0] = 255;
1455     YuvPixel(src_y[1], src_u[0], src_v[0],
1456              rgb_buf + 5, rgb_buf + 6, rgb_buf + 7);
1457     rgb_buf[4] = 255;
1458     src_y += 2;
1459     src_u += 1;
1460     src_v += 1;
1461     rgb_buf += 8;  // Advance 2 pixels.
1462   }
1463   if (width & 1) {
1464     YuvPixel(src_y[0], src_u[0], src_v[0],
1465              rgb_buf + 1, rgb_buf + 2, rgb_buf + 3);
1466     rgb_buf[0] = 255;
1467   }
1468 }
1469 
YToARGBRow_C(const uint8 * src_y,uint8 * rgb_buf,int width)1470 void YToARGBRow_C(const uint8* src_y, uint8* rgb_buf, int width) {
1471   int x;
1472   for (x = 0; x < width - 1; x += 2) {
1473     YuvPixel(src_y[0], 128, 128,
1474              rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
1475     rgb_buf[3] = 255;
1476     YuvPixel(src_y[1], 128, 128,
1477              rgb_buf + 4, rgb_buf + 5, rgb_buf + 6);
1478     rgb_buf[7] = 255;
1479     src_y += 2;
1480     rgb_buf += 8;  // Advance 2 pixels.
1481   }
1482   if (width & 1) {
1483     YuvPixel(src_y[0], 128, 128,
1484              rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
1485     rgb_buf[3] = 255;
1486   }
1487 }
1488 
MirrorRow_C(const uint8 * src,uint8 * dst,int width)1489 void MirrorRow_C(const uint8* src, uint8* dst, int width) {
1490   int x;
1491   src += width - 1;
1492   for (x = 0; x < width - 1; x += 2) {
1493     dst[x] = src[0];
1494     dst[x + 1] = src[-1];
1495     src -= 2;
1496   }
1497   if (width & 1) {
1498     dst[width - 1] = src[0];
1499   }
1500 }
1501 
MirrorUVRow_C(const uint8 * src_uv,uint8 * dst_u,uint8 * dst_v,int width)1502 void MirrorUVRow_C(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int width) {
1503   int x;
1504   src_uv += (width - 1) << 1;
1505   for (x = 0; x < width - 1; x += 2) {
1506     dst_u[x] = src_uv[0];
1507     dst_u[x + 1] = src_uv[-2];
1508     dst_v[x] = src_uv[1];
1509     dst_v[x + 1] = src_uv[-2 + 1];
1510     src_uv -= 4;
1511   }
1512   if (width & 1) {
1513     dst_u[width - 1] = src_uv[0];
1514     dst_v[width - 1] = src_uv[1];
1515   }
1516 }
1517 
ARGBMirrorRow_C(const uint8 * src,uint8 * dst,int width)1518 void ARGBMirrorRow_C(const uint8* src, uint8* dst, int width) {
1519   int x;
1520   const uint32* src32 = (const uint32*)(src);
1521   uint32* dst32 = (uint32*)(dst);
1522   src32 += width - 1;
1523   for (x = 0; x < width - 1; x += 2) {
1524     dst32[x] = src32[0];
1525     dst32[x + 1] = src32[-1];
1526     src32 -= 2;
1527   }
1528   if (width & 1) {
1529     dst32[width - 1] = src32[0];
1530   }
1531 }
1532 
SplitUVRow_C(const uint8 * src_uv,uint8 * dst_u,uint8 * dst_v,int width)1533 void SplitUVRow_C(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int width) {
1534   int x;
1535   for (x = 0; x < width - 1; x += 2) {
1536     dst_u[x] = src_uv[0];
1537     dst_u[x + 1] = src_uv[2];
1538     dst_v[x] = src_uv[1];
1539     dst_v[x + 1] = src_uv[3];
1540     src_uv += 4;
1541   }
1542   if (width & 1) {
1543     dst_u[width - 1] = src_uv[0];
1544     dst_v[width - 1] = src_uv[1];
1545   }
1546 }
1547 
MergeUVRow_C(const uint8 * src_u,const uint8 * src_v,uint8 * dst_uv,int width)1548 void MergeUVRow_C(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
1549                   int width) {
1550   int x;
1551   for (x = 0; x < width - 1; x += 2) {
1552     dst_uv[0] = src_u[x];
1553     dst_uv[1] = src_v[x];
1554     dst_uv[2] = src_u[x + 1];
1555     dst_uv[3] = src_v[x + 1];
1556     dst_uv += 4;
1557   }
1558   if (width & 1) {
1559     dst_uv[0] = src_u[width - 1];
1560     dst_uv[1] = src_v[width - 1];
1561   }
1562 }
1563 
CopyRow_C(const uint8 * src,uint8 * dst,int count)1564 void CopyRow_C(const uint8* src, uint8* dst, int count) {
1565   memcpy(dst, src, count);
1566 }
1567 
CopyRow_16_C(const uint16 * src,uint16 * dst,int count)1568 void CopyRow_16_C(const uint16* src, uint16* dst, int count) {
1569   memcpy(dst, src, count * 2);
1570 }
1571 
SetRow_C(uint8 * dst,uint32 v8,int count)1572 void SetRow_C(uint8* dst, uint32 v8, int count) {
1573 #ifdef _MSC_VER
1574   // VC will generate rep stosb.
1575   int x;
1576   for (x = 0; x < count; ++x) {
1577     dst[x] = v8;
1578   }
1579 #else
1580   memset(dst, v8, count);
1581 #endif
1582 }
1583 
ARGBSetRows_C(uint8 * dst,uint32 v32,int width,int dst_stride,int height)1584 void ARGBSetRows_C(uint8* dst, uint32 v32, int width,
1585                  int dst_stride, int height) {
1586   int y;
1587   for (y = 0; y < height; ++y) {
1588     uint32* d = (uint32*)(dst);
1589     int x;
1590     for (x = 0; x < width; ++x) {
1591       d[x] = v32;
1592     }
1593     dst += dst_stride;
1594   }
1595 }
1596 
1597 // Filter 2 rows of YUY2 UV's (422) into U and V (420).
YUY2ToUVRow_C(const uint8 * src_yuy2,int src_stride_yuy2,uint8 * dst_u,uint8 * dst_v,int width)1598 void YUY2ToUVRow_C(const uint8* src_yuy2, int src_stride_yuy2,
1599                    uint8* dst_u, uint8* dst_v, int width) {
1600   // Output a row of UV values, filtering 2 rows of YUY2.
1601   int x;
1602   for (x = 0; x < width; x += 2) {
1603     dst_u[0] = (src_yuy2[1] + src_yuy2[src_stride_yuy2 + 1] + 1) >> 1;
1604     dst_v[0] = (src_yuy2[3] + src_yuy2[src_stride_yuy2 + 3] + 1) >> 1;
1605     src_yuy2 += 4;
1606     dst_u += 1;
1607     dst_v += 1;
1608   }
1609 }
1610 
1611 // Copy row of YUY2 UV's (422) into U and V (422).
YUY2ToUV422Row_C(const uint8 * src_yuy2,uint8 * dst_u,uint8 * dst_v,int width)1612 void YUY2ToUV422Row_C(const uint8* src_yuy2,
1613                       uint8* dst_u, uint8* dst_v, int width) {
1614   // Output a row of UV values.
1615   int x;
1616   for (x = 0; x < width; x += 2) {
1617     dst_u[0] = src_yuy2[1];
1618     dst_v[0] = src_yuy2[3];
1619     src_yuy2 += 4;
1620     dst_u += 1;
1621     dst_v += 1;
1622   }
1623 }
1624 
1625 // Copy row of YUY2 Y's (422) into Y (420/422).
YUY2ToYRow_C(const uint8 * src_yuy2,uint8 * dst_y,int width)1626 void YUY2ToYRow_C(const uint8* src_yuy2, uint8* dst_y, int width) {
1627   // Output a row of Y values.
1628   int x;
1629   for (x = 0; x < width - 1; x += 2) {
1630     dst_y[x] = src_yuy2[0];
1631     dst_y[x + 1] = src_yuy2[2];
1632     src_yuy2 += 4;
1633   }
1634   if (width & 1) {
1635     dst_y[width - 1] = src_yuy2[0];
1636   }
1637 }
1638 
1639 // Filter 2 rows of UYVY UV's (422) into U and V (420).
UYVYToUVRow_C(const uint8 * src_uyvy,int src_stride_uyvy,uint8 * dst_u,uint8 * dst_v,int width)1640 void UYVYToUVRow_C(const uint8* src_uyvy, int src_stride_uyvy,
1641                    uint8* dst_u, uint8* dst_v, int width) {
1642   // Output a row of UV values.
1643   int x;
1644   for (x = 0; x < width; x += 2) {
1645     dst_u[0] = (src_uyvy[0] + src_uyvy[src_stride_uyvy + 0] + 1) >> 1;
1646     dst_v[0] = (src_uyvy[2] + src_uyvy[src_stride_uyvy + 2] + 1) >> 1;
1647     src_uyvy += 4;
1648     dst_u += 1;
1649     dst_v += 1;
1650   }
1651 }
1652 
1653 // Copy row of UYVY UV's (422) into U and V (422).
UYVYToUV422Row_C(const uint8 * src_uyvy,uint8 * dst_u,uint8 * dst_v,int width)1654 void UYVYToUV422Row_C(const uint8* src_uyvy,
1655                       uint8* dst_u, uint8* dst_v, int width) {
1656   // Output a row of UV values.
1657   int x;
1658   for (x = 0; x < width; x += 2) {
1659     dst_u[0] = src_uyvy[0];
1660     dst_v[0] = src_uyvy[2];
1661     src_uyvy += 4;
1662     dst_u += 1;
1663     dst_v += 1;
1664   }
1665 }
1666 
1667 // Copy row of UYVY Y's (422) into Y (420/422).
UYVYToYRow_C(const uint8 * src_uyvy,uint8 * dst_y,int width)1668 void UYVYToYRow_C(const uint8* src_uyvy, uint8* dst_y, int width) {
1669   // Output a row of Y values.
1670   int x;
1671   for (x = 0; x < width - 1; x += 2) {
1672     dst_y[x] = src_uyvy[1];
1673     dst_y[x + 1] = src_uyvy[3];
1674     src_uyvy += 4;
1675   }
1676   if (width & 1) {
1677     dst_y[width - 1] = src_uyvy[1];
1678   }
1679 }
1680 
1681 #define BLEND(f, b, a) (((256 - a) * b) >> 8) + f
1682 
1683 // Blend src_argb0 over src_argb1 and store to dst_argb.
1684 // dst_argb may be src_argb0 or src_argb1.
1685 // This code mimics the SSSE3 version for better testability.
ARGBBlendRow_C(const uint8 * src_argb0,const uint8 * src_argb1,uint8 * dst_argb,int width)1686 void ARGBBlendRow_C(const uint8* src_argb0, const uint8* src_argb1,
1687                     uint8* dst_argb, int width) {
1688   int x;
1689   for (x = 0; x < width - 1; x += 2) {
1690     uint32 fb = src_argb0[0];
1691     uint32 fg = src_argb0[1];
1692     uint32 fr = src_argb0[2];
1693     uint32 a = src_argb0[3];
1694     uint32 bb = src_argb1[0];
1695     uint32 bg = src_argb1[1];
1696     uint32 br = src_argb1[2];
1697     dst_argb[0] = BLEND(fb, bb, a);
1698     dst_argb[1] = BLEND(fg, bg, a);
1699     dst_argb[2] = BLEND(fr, br, a);
1700     dst_argb[3] = 255u;
1701 
1702     fb = src_argb0[4 + 0];
1703     fg = src_argb0[4 + 1];
1704     fr = src_argb0[4 + 2];
1705     a = src_argb0[4 + 3];
1706     bb = src_argb1[4 + 0];
1707     bg = src_argb1[4 + 1];
1708     br = src_argb1[4 + 2];
1709     dst_argb[4 + 0] = BLEND(fb, bb, a);
1710     dst_argb[4 + 1] = BLEND(fg, bg, a);
1711     dst_argb[4 + 2] = BLEND(fr, br, a);
1712     dst_argb[4 + 3] = 255u;
1713     src_argb0 += 8;
1714     src_argb1 += 8;
1715     dst_argb += 8;
1716   }
1717 
1718   if (width & 1) {
1719     uint32 fb = src_argb0[0];
1720     uint32 fg = src_argb0[1];
1721     uint32 fr = src_argb0[2];
1722     uint32 a = src_argb0[3];
1723     uint32 bb = src_argb1[0];
1724     uint32 bg = src_argb1[1];
1725     uint32 br = src_argb1[2];
1726     dst_argb[0] = BLEND(fb, bb, a);
1727     dst_argb[1] = BLEND(fg, bg, a);
1728     dst_argb[2] = BLEND(fr, br, a);
1729     dst_argb[3] = 255u;
1730   }
1731 }
1732 #undef BLEND
1733 #define ATTENUATE(f, a) (a | (a << 8)) * (f | (f << 8)) >> 24
1734 
1735 // Multiply source RGB by alpha and store to destination.
1736 // This code mimics the SSSE3 version for better testability.
ARGBAttenuateRow_C(const uint8 * src_argb,uint8 * dst_argb,int width)1737 void ARGBAttenuateRow_C(const uint8* src_argb, uint8* dst_argb, int width) {
1738   int i;
1739   for (i = 0; i < width - 1; i += 2) {
1740     uint32 b = src_argb[0];
1741     uint32 g = src_argb[1];
1742     uint32 r = src_argb[2];
1743     uint32 a = src_argb[3];
1744     dst_argb[0] = ATTENUATE(b, a);
1745     dst_argb[1] = ATTENUATE(g, a);
1746     dst_argb[2] = ATTENUATE(r, a);
1747     dst_argb[3] = a;
1748     b = src_argb[4];
1749     g = src_argb[5];
1750     r = src_argb[6];
1751     a = src_argb[7];
1752     dst_argb[4] = ATTENUATE(b, a);
1753     dst_argb[5] = ATTENUATE(g, a);
1754     dst_argb[6] = ATTENUATE(r, a);
1755     dst_argb[7] = a;
1756     src_argb += 8;
1757     dst_argb += 8;
1758   }
1759 
1760   if (width & 1) {
1761     const uint32 b = src_argb[0];
1762     const uint32 g = src_argb[1];
1763     const uint32 r = src_argb[2];
1764     const uint32 a = src_argb[3];
1765     dst_argb[0] = ATTENUATE(b, a);
1766     dst_argb[1] = ATTENUATE(g, a);
1767     dst_argb[2] = ATTENUATE(r, a);
1768     dst_argb[3] = a;
1769   }
1770 }
1771 #undef ATTENUATE
1772 
1773 // Divide source RGB by alpha and store to destination.
1774 // b = (b * 255 + (a / 2)) / a;
1775 // g = (g * 255 + (a / 2)) / a;
1776 // r = (r * 255 + (a / 2)) / a;
1777 // Reciprocal method is off by 1 on some values. ie 125
1778 // 8.8 fixed point inverse table with 1.0 in upper short and 1 / a in lower.
1779 #define T(a) 0x01000000 + (0x10000 / a)
1780 const uint32 fixed_invtbl8[256] = {
1781   0x01000000, 0x0100ffff, T(0x02), T(0x03), T(0x04), T(0x05), T(0x06), T(0x07),
1782   T(0x08), T(0x09), T(0x0a), T(0x0b), T(0x0c), T(0x0d), T(0x0e), T(0x0f),
1783   T(0x10), T(0x11), T(0x12), T(0x13), T(0x14), T(0x15), T(0x16), T(0x17),
1784   T(0x18), T(0x19), T(0x1a), T(0x1b), T(0x1c), T(0x1d), T(0x1e), T(0x1f),
1785   T(0x20), T(0x21), T(0x22), T(0x23), T(0x24), T(0x25), T(0x26), T(0x27),
1786   T(0x28), T(0x29), T(0x2a), T(0x2b), T(0x2c), T(0x2d), T(0x2e), T(0x2f),
1787   T(0x30), T(0x31), T(0x32), T(0x33), T(0x34), T(0x35), T(0x36), T(0x37),
1788   T(0x38), T(0x39), T(0x3a), T(0x3b), T(0x3c), T(0x3d), T(0x3e), T(0x3f),
1789   T(0x40), T(0x41), T(0x42), T(0x43), T(0x44), T(0x45), T(0x46), T(0x47),
1790   T(0x48), T(0x49), T(0x4a), T(0x4b), T(0x4c), T(0x4d), T(0x4e), T(0x4f),
1791   T(0x50), T(0x51), T(0x52), T(0x53), T(0x54), T(0x55), T(0x56), T(0x57),
1792   T(0x58), T(0x59), T(0x5a), T(0x5b), T(0x5c), T(0x5d), T(0x5e), T(0x5f),
1793   T(0x60), T(0x61), T(0x62), T(0x63), T(0x64), T(0x65), T(0x66), T(0x67),
1794   T(0x68), T(0x69), T(0x6a), T(0x6b), T(0x6c), T(0x6d), T(0x6e), T(0x6f),
1795   T(0x70), T(0x71), T(0x72), T(0x73), T(0x74), T(0x75), T(0x76), T(0x77),
1796   T(0x78), T(0x79), T(0x7a), T(0x7b), T(0x7c), T(0x7d), T(0x7e), T(0x7f),
1797   T(0x80), T(0x81), T(0x82), T(0x83), T(0x84), T(0x85), T(0x86), T(0x87),
1798   T(0x88), T(0x89), T(0x8a), T(0x8b), T(0x8c), T(0x8d), T(0x8e), T(0x8f),
1799   T(0x90), T(0x91), T(0x92), T(0x93), T(0x94), T(0x95), T(0x96), T(0x97),
1800   T(0x98), T(0x99), T(0x9a), T(0x9b), T(0x9c), T(0x9d), T(0x9e), T(0x9f),
1801   T(0xa0), T(0xa1), T(0xa2), T(0xa3), T(0xa4), T(0xa5), T(0xa6), T(0xa7),
1802   T(0xa8), T(0xa9), T(0xaa), T(0xab), T(0xac), T(0xad), T(0xae), T(0xaf),
1803   T(0xb0), T(0xb1), T(0xb2), T(0xb3), T(0xb4), T(0xb5), T(0xb6), T(0xb7),
1804   T(0xb8), T(0xb9), T(0xba), T(0xbb), T(0xbc), T(0xbd), T(0xbe), T(0xbf),
1805   T(0xc0), T(0xc1), T(0xc2), T(0xc3), T(0xc4), T(0xc5), T(0xc6), T(0xc7),
1806   T(0xc8), T(0xc9), T(0xca), T(0xcb), T(0xcc), T(0xcd), T(0xce), T(0xcf),
1807   T(0xd0), T(0xd1), T(0xd2), T(0xd3), T(0xd4), T(0xd5), T(0xd6), T(0xd7),
1808   T(0xd8), T(0xd9), T(0xda), T(0xdb), T(0xdc), T(0xdd), T(0xde), T(0xdf),
1809   T(0xe0), T(0xe1), T(0xe2), T(0xe3), T(0xe4), T(0xe5), T(0xe6), T(0xe7),
1810   T(0xe8), T(0xe9), T(0xea), T(0xeb), T(0xec), T(0xed), T(0xee), T(0xef),
1811   T(0xf0), T(0xf1), T(0xf2), T(0xf3), T(0xf4), T(0xf5), T(0xf6), T(0xf7),
1812   T(0xf8), T(0xf9), T(0xfa), T(0xfb), T(0xfc), T(0xfd), T(0xfe), 0x01000100 };
1813 #undef T
1814 
ARGBUnattenuateRow_C(const uint8 * src_argb,uint8 * dst_argb,int width)1815 void ARGBUnattenuateRow_C(const uint8* src_argb, uint8* dst_argb, int width) {
1816   int i;
1817   for (i = 0; i < width; ++i) {
1818     uint32 b = src_argb[0];
1819     uint32 g = src_argb[1];
1820     uint32 r = src_argb[2];
1821     const uint32 a = src_argb[3];
1822     const uint32 ia = fixed_invtbl8[a] & 0xffff;  // 8.8 fixed point
1823     b = (b * ia) >> 8;
1824     g = (g * ia) >> 8;
1825     r = (r * ia) >> 8;
1826     // Clamping should not be necessary but is free in assembly.
1827     dst_argb[0] = clamp255(b);
1828     dst_argb[1] = clamp255(g);
1829     dst_argb[2] = clamp255(r);
1830     dst_argb[3] = a;
1831     src_argb += 4;
1832     dst_argb += 4;
1833   }
1834 }
1835 
ComputeCumulativeSumRow_C(const uint8 * row,int32 * cumsum,const int32 * previous_cumsum,int width)1836 void ComputeCumulativeSumRow_C(const uint8* row, int32* cumsum,
1837                                const int32* previous_cumsum, int width) {
1838   int32 row_sum[4] = {0, 0, 0, 0};
1839   int x;
1840   for (x = 0; x < width; ++x) {
1841     row_sum[0] += row[x * 4 + 0];
1842     row_sum[1] += row[x * 4 + 1];
1843     row_sum[2] += row[x * 4 + 2];
1844     row_sum[3] += row[x * 4 + 3];
1845     cumsum[x * 4 + 0] = row_sum[0]  + previous_cumsum[x * 4 + 0];
1846     cumsum[x * 4 + 1] = row_sum[1]  + previous_cumsum[x * 4 + 1];
1847     cumsum[x * 4 + 2] = row_sum[2]  + previous_cumsum[x * 4 + 2];
1848     cumsum[x * 4 + 3] = row_sum[3]  + previous_cumsum[x * 4 + 3];
1849   }
1850 }
1851 
CumulativeSumToAverageRow_C(const int32 * tl,const int32 * bl,int w,int area,uint8 * dst,int count)1852 void CumulativeSumToAverageRow_C(const int32* tl, const int32* bl,
1853                                 int w, int area, uint8* dst, int count) {
1854   float ooa = 1.0f / area;
1855   int i;
1856   for (i = 0; i < count; ++i) {
1857     dst[0] = (uint8)((bl[w + 0] + tl[0] - bl[0] - tl[w + 0]) * ooa);
1858     dst[1] = (uint8)((bl[w + 1] + tl[1] - bl[1] - tl[w + 1]) * ooa);
1859     dst[2] = (uint8)((bl[w + 2] + tl[2] - bl[2] - tl[w + 2]) * ooa);
1860     dst[3] = (uint8)((bl[w + 3] + tl[3] - bl[3] - tl[w + 3]) * ooa);
1861     dst += 4;
1862     tl += 4;
1863     bl += 4;
1864   }
1865 }
1866 
1867 // Copy pixels from rotated source to destination row with a slope.
1868 LIBYUV_API
ARGBAffineRow_C(const uint8 * src_argb,int src_argb_stride,uint8 * dst_argb,const float * uv_dudv,int width)1869 void ARGBAffineRow_C(const uint8* src_argb, int src_argb_stride,
1870                      uint8* dst_argb, const float* uv_dudv, int width) {
1871   int i;
1872   // Render a row of pixels from source into a buffer.
1873   float uv[2];
1874   uv[0] = uv_dudv[0];
1875   uv[1] = uv_dudv[1];
1876   for (i = 0; i < width; ++i) {
1877     int x = (int)(uv[0]);
1878     int y = (int)(uv[1]);
1879     *(uint32*)(dst_argb) =
1880         *(const uint32*)(src_argb + y * src_argb_stride +
1881                                          x * 4);
1882     dst_argb += 4;
1883     uv[0] += uv_dudv[2];
1884     uv[1] += uv_dudv[3];
1885   }
1886 }
1887 
1888 // Blend 2 rows into 1 for conversions such as I422ToI420.
HalfRow_C(const uint8 * src_uv,int src_uv_stride,uint8 * dst_uv,int pix)1889 void HalfRow_C(const uint8* src_uv, int src_uv_stride,
1890                uint8* dst_uv, int pix) {
1891   int x;
1892   for (x = 0; x < pix; ++x) {
1893     dst_uv[x] = (src_uv[x] + src_uv[src_uv_stride + x] + 1) >> 1;
1894   }
1895 }
1896 
HalfRow_16_C(const uint16 * src_uv,int src_uv_stride,uint16 * dst_uv,int pix)1897 void HalfRow_16_C(const uint16* src_uv, int src_uv_stride,
1898                   uint16* dst_uv, int pix) {
1899   int x;
1900   for (x = 0; x < pix; ++x) {
1901     dst_uv[x] = (src_uv[x] + src_uv[src_uv_stride + x] + 1) >> 1;
1902   }
1903 }
1904 
1905 // C version 2x2 -> 2x1.
InterpolateRow_C(uint8 * dst_ptr,const uint8 * src_ptr,ptrdiff_t src_stride,int width,int source_y_fraction)1906 void InterpolateRow_C(uint8* dst_ptr, const uint8* src_ptr,
1907                       ptrdiff_t src_stride,
1908                       int width, int source_y_fraction) {
1909   int y1_fraction = source_y_fraction;
1910   int y0_fraction = 256 - y1_fraction;
1911   const uint8* src_ptr1 = src_ptr + src_stride;
1912   int x;
1913   if (source_y_fraction == 0) {
1914     memcpy(dst_ptr, src_ptr, width);
1915     return;
1916   }
1917   if (source_y_fraction == 128) {
1918     HalfRow_C(src_ptr, (int)(src_stride), dst_ptr, width);
1919     return;
1920   }
1921   for (x = 0; x < width - 1; x += 2) {
1922     dst_ptr[0] = (src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction) >> 8;
1923     dst_ptr[1] = (src_ptr[1] * y0_fraction + src_ptr1[1] * y1_fraction) >> 8;
1924     src_ptr += 2;
1925     src_ptr1 += 2;
1926     dst_ptr += 2;
1927   }
1928   if (width & 1) {
1929     dst_ptr[0] = (src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction) >> 8;
1930   }
1931 }
1932 
InterpolateRow_16_C(uint16 * dst_ptr,const uint16 * src_ptr,ptrdiff_t src_stride,int width,int source_y_fraction)1933 void InterpolateRow_16_C(uint16* dst_ptr, const uint16* src_ptr,
1934                          ptrdiff_t src_stride,
1935                          int width, int source_y_fraction) {
1936   int y1_fraction = source_y_fraction;
1937   int y0_fraction = 256 - y1_fraction;
1938   const uint16* src_ptr1 = src_ptr + src_stride;
1939   int x;
1940   if (source_y_fraction == 0) {
1941     memcpy(dst_ptr, src_ptr, width * 2);
1942     return;
1943   }
1944   if (source_y_fraction == 128) {
1945     HalfRow_16_C(src_ptr, (int)(src_stride), dst_ptr, width);
1946     return;
1947   }
1948   for (x = 0; x < width - 1; x += 2) {
1949     dst_ptr[0] = (src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction) >> 8;
1950     dst_ptr[1] = (src_ptr[1] * y0_fraction + src_ptr1[1] * y1_fraction) >> 8;
1951     src_ptr += 2;
1952     src_ptr1 += 2;
1953     dst_ptr += 2;
1954   }
1955   if (width & 1) {
1956     dst_ptr[0] = (src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction) >> 8;
1957   }
1958 }
1959 
1960 // Select 2 channels from ARGB on alternating pixels.  e.g.  BGBGBGBG
ARGBToBayerRow_C(const uint8 * src_argb,uint8 * dst_bayer,uint32 selector,int pix)1961 void ARGBToBayerRow_C(const uint8* src_argb,
1962                       uint8* dst_bayer, uint32 selector, int pix) {
1963   int index0 = selector & 0xff;
1964   int index1 = (selector >> 8) & 0xff;
1965   // Copy a row of Bayer.
1966   int x;
1967   for (x = 0; x < pix - 1; x += 2) {
1968     dst_bayer[0] = src_argb[index0];
1969     dst_bayer[1] = src_argb[index1];
1970     src_argb += 8;
1971     dst_bayer += 2;
1972   }
1973   if (pix & 1) {
1974     dst_bayer[0] = src_argb[index0];
1975   }
1976 }
1977 
1978 // Select G channel from ARGB.  e.g.  GGGGGGGG
ARGBToBayerGGRow_C(const uint8 * src_argb,uint8 * dst_bayer,uint32 selector,int pix)1979 void ARGBToBayerGGRow_C(const uint8* src_argb,
1980                         uint8* dst_bayer, uint32 selector, int pix) {
1981   // Copy a row of G.
1982   int x;
1983   for (x = 0; x < pix - 1; x += 2) {
1984     dst_bayer[0] = src_argb[1];
1985     dst_bayer[1] = src_argb[5];
1986     src_argb += 8;
1987     dst_bayer += 2;
1988   }
1989   if (pix & 1) {
1990     dst_bayer[0] = src_argb[1];
1991   }
1992 }
1993 
1994 // Use first 4 shuffler values to reorder ARGB channels.
ARGBShuffleRow_C(const uint8 * src_argb,uint8 * dst_argb,const uint8 * shuffler,int pix)1995 void ARGBShuffleRow_C(const uint8* src_argb, uint8* dst_argb,
1996                       const uint8* shuffler, int pix) {
1997   int index0 = shuffler[0];
1998   int index1 = shuffler[1];
1999   int index2 = shuffler[2];
2000   int index3 = shuffler[3];
2001   // Shuffle a row of ARGB.
2002   int x;
2003   for (x = 0; x < pix; ++x) {
2004     // To support in-place conversion.
2005     uint8 b = src_argb[index0];
2006     uint8 g = src_argb[index1];
2007     uint8 r = src_argb[index2];
2008     uint8 a = src_argb[index3];
2009     dst_argb[0] = b;
2010     dst_argb[1] = g;
2011     dst_argb[2] = r;
2012     dst_argb[3] = a;
2013     src_argb += 4;
2014     dst_argb += 4;
2015   }
2016 }
2017 
I422ToYUY2Row_C(const uint8 * src_y,const uint8 * src_u,const uint8 * src_v,uint8 * dst_frame,int width)2018 void I422ToYUY2Row_C(const uint8* src_y,
2019                      const uint8* src_u,
2020                      const uint8* src_v,
2021                      uint8* dst_frame, int width) {
2022   int x;
2023   for (x = 0; x < width - 1; x += 2) {
2024     dst_frame[0] = src_y[0];
2025     dst_frame[1] = src_u[0];
2026     dst_frame[2] = src_y[1];
2027     dst_frame[3] = src_v[0];
2028     dst_frame += 4;
2029     src_y += 2;
2030     src_u += 1;
2031     src_v += 1;
2032   }
2033   if (width & 1) {
2034     dst_frame[0] = src_y[0];
2035     dst_frame[1] = src_u[0];
2036     dst_frame[2] = src_y[0];  // duplicate last y
2037     dst_frame[3] = src_v[0];
2038   }
2039 }
2040 
I422ToUYVYRow_C(const uint8 * src_y,const uint8 * src_u,const uint8 * src_v,uint8 * dst_frame,int width)2041 void I422ToUYVYRow_C(const uint8* src_y,
2042                      const uint8* src_u,
2043                      const uint8* src_v,
2044                      uint8* dst_frame, int width) {
2045   int x;
2046   for (x = 0; x < width - 1; x += 2) {
2047     dst_frame[0] = src_u[0];
2048     dst_frame[1] = src_y[0];
2049     dst_frame[2] = src_v[0];
2050     dst_frame[3] = src_y[1];
2051     dst_frame += 4;
2052     src_y += 2;
2053     src_u += 1;
2054     src_v += 1;
2055   }
2056   if (width & 1) {
2057     dst_frame[0] = src_u[0];
2058     dst_frame[1] = src_y[0];
2059     dst_frame[2] = src_v[0];
2060     dst_frame[3] = src_y[0];  // duplicate last y
2061   }
2062 }
2063 
2064 #if !defined(LIBYUV_DISABLE_X86) && defined(HAS_I422TOARGBROW_SSSE3)
2065 // row_win.cc has asm version, but GCC uses 2 step wrapper.
2066 #if !defined(_MSC_VER) && (defined(__x86_64__) || defined(__i386__))
I422ToRGB565Row_SSSE3(const uint8 * src_y,const uint8 * src_u,const uint8 * src_v,uint8 * rgb_buf,int width)2067 void I422ToRGB565Row_SSSE3(const uint8* src_y,
2068                            const uint8* src_u,
2069                            const uint8* src_v,
2070                            uint8* rgb_buf,
2071                            int width) {
2072   // Allocate a row of ARGB.
2073   align_buffer_64(row, width * 4);
2074   I422ToARGBRow_SSSE3(src_y, src_u, src_v, row, width);
2075   ARGBToRGB565Row_SSE2(row, rgb_buf, width);
2076   free_aligned_buffer_64(row);
2077 }
2078 #endif  // !defined(_MSC_VER) && (defined(__x86_64__) || defined(__i386__))
2079 
2080 #if defined(_M_IX86) || defined(__x86_64__) || defined(__i386__)
I422ToARGB1555Row_SSSE3(const uint8 * src_y,const uint8 * src_u,const uint8 * src_v,uint8 * rgb_buf,int width)2081 void I422ToARGB1555Row_SSSE3(const uint8* src_y,
2082                              const uint8* src_u,
2083                              const uint8* src_v,
2084                              uint8* rgb_buf,
2085                              int width) {
2086   // Allocate a row of ARGB.
2087   align_buffer_64(row, width * 4);
2088   I422ToARGBRow_SSSE3(src_y, src_u, src_v, row, width);
2089   ARGBToARGB1555Row_SSE2(row, rgb_buf, width);
2090   free_aligned_buffer_64(row);
2091 }
2092 
I422ToARGB4444Row_SSSE3(const uint8 * src_y,const uint8 * src_u,const uint8 * src_v,uint8 * rgb_buf,int width)2093 void I422ToARGB4444Row_SSSE3(const uint8* src_y,
2094                              const uint8* src_u,
2095                              const uint8* src_v,
2096                              uint8* rgb_buf,
2097                              int width) {
2098   // Allocate a row of ARGB.
2099   align_buffer_64(row, width * 4);
2100   I422ToARGBRow_SSSE3(src_y, src_u, src_v, row, width);
2101   ARGBToARGB4444Row_SSE2(row, rgb_buf, width);
2102   free_aligned_buffer_64(row);
2103 }
2104 
NV12ToRGB565Row_SSSE3(const uint8 * src_y,const uint8 * src_uv,uint8 * dst_rgb565,int width)2105 void NV12ToRGB565Row_SSSE3(const uint8* src_y,
2106                            const uint8* src_uv,
2107                            uint8* dst_rgb565,
2108                            int width) {
2109   // Allocate a row of ARGB.
2110   align_buffer_64(row, width * 4);
2111   NV12ToARGBRow_SSSE3(src_y, src_uv, row, width);
2112   ARGBToRGB565Row_SSE2(row, dst_rgb565, width);
2113   free_aligned_buffer_64(row);
2114 }
2115 
NV21ToRGB565Row_SSSE3(const uint8 * src_y,const uint8 * src_vu,uint8 * dst_rgb565,int width)2116 void NV21ToRGB565Row_SSSE3(const uint8* src_y,
2117                            const uint8* src_vu,
2118                            uint8* dst_rgb565,
2119                            int width) {
2120   // Allocate a row of ARGB.
2121   align_buffer_64(row, width * 4);
2122   NV21ToARGBRow_SSSE3(src_y, src_vu, row, width);
2123   ARGBToRGB565Row_SSE2(row, dst_rgb565, width);
2124   free_aligned_buffer_64(row);
2125 }
2126 
YUY2ToARGBRow_SSSE3(const uint8 * src_yuy2,uint8 * dst_argb,int width)2127 void YUY2ToARGBRow_SSSE3(const uint8* src_yuy2,
2128                          uint8* dst_argb,
2129                          int width) {
2130   // Allocate a rows of yuv.
2131   align_buffer_64(row_y, ((width + 63) & ~63) * 2);
2132   uint8* row_u = row_y + ((width + 63) & ~63);
2133   uint8* row_v = row_u + ((width + 63) & ~63) / 2;
2134   YUY2ToUV422Row_SSE2(src_yuy2, row_u, row_v, width);
2135   YUY2ToYRow_SSE2(src_yuy2, row_y, width);
2136   I422ToARGBRow_SSSE3(row_y, row_u, row_v, dst_argb, width);
2137   free_aligned_buffer_64(row_y);
2138 }
2139 
YUY2ToARGBRow_Unaligned_SSSE3(const uint8 * src_yuy2,uint8 * dst_argb,int width)2140 void YUY2ToARGBRow_Unaligned_SSSE3(const uint8* src_yuy2,
2141                                    uint8* dst_argb,
2142                                    int width) {
2143   // Allocate a rows of yuv.
2144   align_buffer_64(row_y, ((width + 63) & ~63) * 2);
2145   uint8* row_u = row_y + ((width + 63) & ~63);
2146   uint8* row_v = row_u + ((width + 63) & ~63) / 2;
2147   YUY2ToUV422Row_Unaligned_SSE2(src_yuy2, row_u, row_v, width);
2148   YUY2ToYRow_Unaligned_SSE2(src_yuy2, row_y, width);
2149   I422ToARGBRow_Unaligned_SSSE3(row_y, row_u, row_v, dst_argb, width);
2150   free_aligned_buffer_64(row_y);
2151 }
2152 
UYVYToARGBRow_SSSE3(const uint8 * src_uyvy,uint8 * dst_argb,int width)2153 void UYVYToARGBRow_SSSE3(const uint8* src_uyvy,
2154                          uint8* dst_argb,
2155                          int width) {
2156   // Allocate a rows of yuv.
2157   align_buffer_64(row_y, ((width + 63) & ~63) * 2);
2158   uint8* row_u = row_y + ((width + 63) & ~63);
2159   uint8* row_v = row_u + ((width + 63) & ~63) / 2;
2160   UYVYToUV422Row_SSE2(src_uyvy, row_u, row_v, width);
2161   UYVYToYRow_SSE2(src_uyvy, row_y, width);
2162   I422ToARGBRow_SSSE3(row_y, row_u, row_v, dst_argb, width);
2163   free_aligned_buffer_64(row_y);
2164 }
2165 
UYVYToARGBRow_Unaligned_SSSE3(const uint8 * src_uyvy,uint8 * dst_argb,int width)2166 void UYVYToARGBRow_Unaligned_SSSE3(const uint8* src_uyvy,
2167                                    uint8* dst_argb,
2168                                    int width) {
2169   // Allocate a rows of yuv.
2170   align_buffer_64(row_y, ((width + 63) & ~63) * 2);
2171   uint8* row_u = row_y + ((width + 63) & ~63);
2172   uint8* row_v = row_u + ((width + 63) & ~63) / 2;
2173   UYVYToUV422Row_Unaligned_SSE2(src_uyvy, row_u, row_v, width);
2174   UYVYToYRow_Unaligned_SSE2(src_uyvy, row_y, width);
2175   I422ToARGBRow_Unaligned_SSSE3(row_y, row_u, row_v, dst_argb, width);
2176   free_aligned_buffer_64(row_y);
2177 }
2178 
2179 #endif  // defined(_M_IX86) || defined(__x86_64__) || defined(__i386__)
2180 #endif  // !defined(LIBYUV_DISABLE_X86)
2181 
ARGBPolynomialRow_C(const uint8 * src_argb,uint8 * dst_argb,const float * poly,int width)2182 void ARGBPolynomialRow_C(const uint8* src_argb,
2183                          uint8* dst_argb, const float* poly,
2184                          int width) {
2185   int i;
2186   for (i = 0; i < width; ++i) {
2187     float b = (float)(src_argb[0]);
2188     float g = (float)(src_argb[1]);
2189     float r = (float)(src_argb[2]);
2190     float a = (float)(src_argb[3]);
2191     float b2 = b * b;
2192     float g2 = g * g;
2193     float r2 = r * r;
2194     float a2 = a * a;
2195     float db = poly[0] + poly[4] * b;
2196     float dg = poly[1] + poly[5] * g;
2197     float dr = poly[2] + poly[6] * r;
2198     float da = poly[3] + poly[7] * a;
2199     float b3 = b2 * b;
2200     float g3 = g2 * g;
2201     float r3 = r2 * r;
2202     float a3 = a2 * a;
2203     db += poly[8] * b2;
2204     dg += poly[9] * g2;
2205     dr += poly[10] * r2;
2206     da += poly[11] * a2;
2207     db += poly[12] * b3;
2208     dg += poly[13] * g3;
2209     dr += poly[14] * r3;
2210     da += poly[15] * a3;
2211 
2212     dst_argb[0] = Clamp((int32)(db));
2213     dst_argb[1] = Clamp((int32)(dg));
2214     dst_argb[2] = Clamp((int32)(dr));
2215     dst_argb[3] = Clamp((int32)(da));
2216     src_argb += 4;
2217     dst_argb += 4;
2218   }
2219 }
2220 
ARGBLumaColorTableRow_C(const uint8 * src_argb,uint8 * dst_argb,int width,const uint8 * luma,uint32 lumacoeff)2221 void ARGBLumaColorTableRow_C(const uint8* src_argb, uint8* dst_argb, int width,
2222                              const uint8* luma, uint32 lumacoeff) {
2223   uint32 bc = lumacoeff & 0xff;
2224   uint32 gc = (lumacoeff >> 8) & 0xff;
2225   uint32 rc = (lumacoeff >> 16) & 0xff;
2226 
2227   int i;
2228   for (i = 0; i < width - 1; i += 2) {
2229     // Luminance in rows, color values in columns.
2230     const uint8* luma0 = ((src_argb[0] * bc + src_argb[1] * gc +
2231                            src_argb[2] * rc) & 0x7F00u) + luma;
2232     const uint8* luma1;
2233     dst_argb[0] = luma0[src_argb[0]];
2234     dst_argb[1] = luma0[src_argb[1]];
2235     dst_argb[2] = luma0[src_argb[2]];
2236     dst_argb[3] = src_argb[3];
2237     luma1 = ((src_argb[4] * bc + src_argb[5] * gc +
2238               src_argb[6] * rc) & 0x7F00u) + luma;
2239     dst_argb[4] = luma1[src_argb[4]];
2240     dst_argb[5] = luma1[src_argb[5]];
2241     dst_argb[6] = luma1[src_argb[6]];
2242     dst_argb[7] = src_argb[7];
2243     src_argb += 8;
2244     dst_argb += 8;
2245   }
2246   if (width & 1) {
2247     // Luminance in rows, color values in columns.
2248     const uint8* luma0 = ((src_argb[0] * bc + src_argb[1] * gc +
2249                            src_argb[2] * rc) & 0x7F00u) + luma;
2250     dst_argb[0] = luma0[src_argb[0]];
2251     dst_argb[1] = luma0[src_argb[1]];
2252     dst_argb[2] = luma0[src_argb[2]];
2253     dst_argb[3] = src_argb[3];
2254   }
2255 }
2256 
ARGBCopyAlphaRow_C(const uint8 * src,uint8 * dst,int width)2257 void ARGBCopyAlphaRow_C(const uint8* src, uint8* dst, int width) {
2258   int i;
2259   for (i = 0; i < width - 1; i += 2) {
2260     dst[3] = src[3];
2261     dst[7] = src[7];
2262     dst += 8;
2263     src += 8;
2264   }
2265   if (width & 1) {
2266     dst[3] = src[3];
2267   }
2268 }
2269 
ARGBCopyYToAlphaRow_C(const uint8 * src,uint8 * dst,int width)2270 void ARGBCopyYToAlphaRow_C(const uint8* src, uint8* dst, int width) {
2271   int i;
2272   for (i = 0; i < width - 1; i += 2) {
2273     dst[3] = src[0];
2274     dst[7] = src[1];
2275     dst += 8;
2276     src += 2;
2277   }
2278   if (width & 1) {
2279     dst[3] = src[0];
2280   }
2281 }
2282 
2283 #ifdef __cplusplus
2284 }  // extern "C"
2285 }  // namespace libyuv
2286 #endif
2287