1 /*
2  *  Copyright 2011 The LibYuv Project Authors. All rights reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS. All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #ifndef INCLUDE_LIBYUV_ROW_H_  // NOLINT
12 #define INCLUDE_LIBYUV_ROW_H_
13 
14 #include <stdlib.h>  // For malloc.
15 
16 #include "libyuv/basic_types.h"
17 
18 #if defined(__native_client__)
19 #include "ppapi/c/pp_macros.h"  // For PPAPI_RELEASE
20 #endif
21 
22 #ifdef __cplusplus
23 namespace libyuv {
24 extern "C" {
25 #endif
26 
27 #define IS_ALIGNED(p, a) (!((uintptr_t)(p) & ((a) - 1)))
28 
29 #ifdef __cplusplus
30 #define align_buffer_64(var, size)                                             \
31   uint8* var##_mem = reinterpret_cast<uint8*>(malloc((size) + 63));            \
32   uint8* var = reinterpret_cast<uint8*>                                        \
33       ((reinterpret_cast<intptr_t>(var##_mem) + 63) & ~63)
34 #else
35 #define align_buffer_64(var, size)                                             \
36   uint8* var##_mem = (uint8*)(malloc((size) + 63));               /* NOLINT */ \
37   uint8* var = (uint8*)(((intptr_t)(var##_mem) + 63) & ~63)       /* NOLINT */
38 #endif
39 
40 #define free_aligned_buffer_64(var) \
41   free(var##_mem);  \
42   var = 0
43 
44 #if defined(__pnacl__) || defined(__CLR_VER) || defined(COVERAGE_ENABLED) || \
45     defined(TARGET_IPHONE_SIMULATOR) || \
46     (defined(_MSC_VER) && defined(__clang__))
47 #define LIBYUV_DISABLE_X86
48 #endif
49 // True if compiling for SSSE3 as a requirement.
50 #if defined(__SSSE3__) || (defined(_M_IX86_FP) && (_M_IX86_FP >= 3))
51 #define LIBYUV_SSSE3_ONLY
52 #endif
53 
54 // Enable for NaCL pepper 33 for bundle and AVX2 support.
55 #if defined(__native_client__) && PPAPI_RELEASE >= 33
56 #define NEW_BINUTILS
57 #endif
58 #if defined(__native_client__) && defined(__arm__) && PPAPI_RELEASE < 37
59 #define LIBYUV_DISABLE_NEON
60 #endif
61 
62 // The following are available on all x86 platforms:
63 #if !defined(LIBYUV_DISABLE_X86) && \
64     (defined(_M_IX86) || defined(__x86_64__) || defined(__i386__))
65 // Effects:
66 #define HAS_ARGBADDROW_SSE2
67 #define HAS_ARGBAFFINEROW_SSE2
68 #define HAS_ARGBATTENUATEROW_SSSE3
69 #define HAS_ARGBBLENDROW_SSSE3
70 #define HAS_ARGBCOLORMATRIXROW_SSSE3
71 #define HAS_ARGBCOLORTABLEROW_X86
72 #define HAS_ARGBCOPYALPHAROW_SSE2
73 #define HAS_ARGBCOPYYTOALPHAROW_SSE2
74 #define HAS_ARGBGRAYROW_SSSE3
75 #define HAS_ARGBLUMACOLORTABLEROW_SSSE3
76 #define HAS_ARGBMIRRORROW_SSSE3
77 #define HAS_ARGBMULTIPLYROW_SSE2
78 #define HAS_ARGBPOLYNOMIALROW_SSE2
79 #define HAS_ARGBQUANTIZEROW_SSE2
80 #define HAS_ARGBSEPIAROW_SSSE3
81 #define HAS_ARGBSHADEROW_SSE2
82 #define HAS_ARGBSUBTRACTROW_SSE2
83 #define HAS_ARGBTOUVROW_SSSE3
84 #define HAS_ARGBUNATTENUATEROW_SSE2
85 #define HAS_COMPUTECUMULATIVESUMROW_SSE2
86 #define HAS_CUMULATIVESUMTOAVERAGEROW_SSE2
87 #define HAS_INTERPOLATEROW_SSE2
88 #define HAS_INTERPOLATEROW_SSSE3
89 #define HAS_RGBCOLORTABLEROW_X86
90 #define HAS_SOBELROW_SSE2
91 #define HAS_SOBELTOPLANEROW_SSE2
92 #define HAS_SOBELXROW_SSE2
93 #define HAS_SOBELXYROW_SSE2
94 #define HAS_SOBELYROW_SSE2
95 
96 // Conversions:
97 #define HAS_ABGRTOUVROW_SSSE3
98 #define HAS_ABGRTOYROW_SSSE3
99 #define HAS_ARGB1555TOARGBROW_SSE2
100 #define HAS_ARGB4444TOARGBROW_SSE2
101 #define HAS_ARGBSHUFFLEROW_SSE2
102 #define HAS_ARGBSHUFFLEROW_SSSE3
103 #define HAS_ARGBTOARGB1555ROW_SSE2
104 #define HAS_ARGBTOARGB4444ROW_SSE2
105 #define HAS_ARGBTOBAYERGGROW_SSE2
106 #define HAS_ARGBTOBAYERROW_SSSE3
107 #define HAS_ARGBTORAWROW_SSSE3
108 #define HAS_ARGBTORGB24ROW_SSSE3
109 #define HAS_ARGBTORGB565ROW_SSE2
110 #define HAS_ARGBTOUV422ROW_SSSE3
111 #define HAS_ARGBTOUV444ROW_SSSE3
112 #define HAS_ARGBTOUVJROW_SSSE3
113 #define HAS_ARGBTOYJROW_SSSE3
114 #define HAS_ARGBTOYROW_SSSE3
115 #define HAS_BGRATOUVROW_SSSE3
116 #define HAS_BGRATOYROW_SSSE3
117 #define HAS_COPYROW_ERMS
118 #define HAS_COPYROW_SSE2
119 #define HAS_COPYROW_X86
120 #define HAS_HALFROW_SSE2
121 #define HAS_I400TOARGBROW_SSE2
122 #define HAS_I411TOARGBROW_SSSE3
123 #define HAS_I422TOARGB1555ROW_SSSE3
124 #define HAS_I422TOABGRROW_SSSE3
125 #define HAS_I422TOARGB1555ROW_SSSE3
126 #define HAS_I422TOARGB4444ROW_SSSE3
127 #define HAS_I422TOARGBROW_SSSE3
128 #define HAS_I422TOBGRAROW_SSSE3
129 #define HAS_I422TORAWROW_SSSE3
130 #define HAS_I422TORGB24ROW_SSSE3
131 #define HAS_I422TORGB565ROW_SSSE3
132 #define HAS_I422TORGBAROW_SSSE3
133 #define HAS_I422TOUYVYROW_SSE2
134 #define HAS_I422TOYUY2ROW_SSE2
135 #define HAS_I444TOARGBROW_SSSE3
136 #define HAS_MERGEUVROW_SSE2
137 #define HAS_MIRRORROW_SSE2
138 #define HAS_MIRRORROW_SSSE3
139 #define HAS_MIRRORROW_UV_SSSE3
140 #define HAS_MIRRORUVROW_SSSE3
141 #define HAS_NV12TOARGBROW_SSSE3
142 #define HAS_NV12TORGB565ROW_SSSE3
143 #define HAS_NV21TOARGBROW_SSSE3
144 #define HAS_NV21TORGB565ROW_SSSE3
145 #define HAS_RAWTOARGBROW_SSSE3
146 #define HAS_RAWTOYROW_SSSE3
147 #define HAS_RGB24TOARGBROW_SSSE3
148 #define HAS_RGB24TOYROW_SSSE3
149 #define HAS_RGB565TOARGBROW_SSE2
150 #define HAS_RGBATOUVROW_SSSE3
151 #define HAS_RGBATOYROW_SSSE3
152 #define HAS_SETROW_X86
153 #define HAS_SPLITUVROW_SSE2
154 #define HAS_UYVYTOARGBROW_SSSE3
155 #define HAS_UYVYTOUV422ROW_SSE2
156 #define HAS_UYVYTOUVROW_SSE2
157 #define HAS_UYVYTOYROW_SSE2
158 #define HAS_YTOARGBROW_SSE2
159 #define HAS_YUY2TOARGBROW_SSSE3
160 #define HAS_YUY2TOUV422ROW_SSE2
161 #define HAS_YUY2TOUVROW_SSE2
162 #define HAS_YUY2TOYROW_SSE2
163 #endif
164 
165 // The following are available on x64 Visual C:
166 #if !defined(LIBYUV_DISABLE_X86) && defined (_M_X64)
167 #define HAS_I422TOARGBROW_SSSE3
168 #endif
169 
170 // GCC >= 4.7.0 required for AVX2.
171 #if defined(__GNUC__) && (defined(__x86_64__) || defined(__i386__))
172 #if (__GNUC__ > 4) || (__GNUC__ == 4 && (__GNUC_MINOR__ >= 7))
173 #define GCC_HAS_AVX2 1
174 #endif  // GNUC >= 4.7
175 #endif  // __GNUC__
176 
177 // clang >= 3.4.0 required for AVX2.
178 #if defined(__clang__) && (defined(__x86_64__) || defined(__i386__))
179 #if (__clang_major__ > 3) || (__clang_major__ == 3 && (__clang_minor__ >= 4))
180 #define CLANG_HAS_AVX2 1
181 #endif  // clang >= 3.4
182 #endif  // __clang__
183 
184 // Visual C 2012 required for AVX2.
185 #if defined(_M_IX86) && defined(_MSC_VER) && _MSC_VER >= 1700
186 #define VISUALC_HAS_AVX2 1
187 #endif  // VisualStudio >= 2012
188 
189 // The following are available on all x86 platforms, but
190 // require VS2012, clang 3.4 or gcc 4.7.
191 // The code supports NaCL but requires a new compiler and validator.
192 #if !defined(LIBYUV_DISABLE_X86) && (defined(VISUALC_HAS_AVX2) || \
193     defined(CLANG_HAS_AVX2) || defined(GCC_HAS_AVX2))
194 // Effects:
195 #define HAS_ARGBPOLYNOMIALROW_AVX2
196 #define HAS_ARGBSHUFFLEROW_AVX2
197 #define HAS_ARGBCOPYALPHAROW_AVX2
198 #define HAS_ARGBCOPYYTOALPHAROW_AVX2
199 #endif
200 
201 // The following are require VS2012.
202 // TODO(fbarchard): Port to gcc.
203 #if !defined(LIBYUV_DISABLE_X86) && defined(VISUALC_HAS_AVX2)
204 #define HAS_ARGBTOUVROW_AVX2
205 #define HAS_ARGBTOYJROW_AVX2
206 #define HAS_ARGBTOYROW_AVX2
207 #define HAS_HALFROW_AVX2
208 #define HAS_I422TOARGBROW_AVX2
209 #define HAS_INTERPOLATEROW_AVX2
210 #define HAS_MERGEUVROW_AVX2
211 #define HAS_MIRRORROW_AVX2
212 #define HAS_SPLITUVROW_AVX2
213 #define HAS_UYVYTOUV422ROW_AVX2
214 #define HAS_UYVYTOUVROW_AVX2
215 #define HAS_UYVYTOYROW_AVX2
216 #define HAS_YUY2TOUV422ROW_AVX2
217 #define HAS_YUY2TOUVROW_AVX2
218 #define HAS_YUY2TOYROW_AVX2
219 
220 // Effects:
221 #define HAS_ARGBADDROW_AVX2
222 #define HAS_ARGBATTENUATEROW_AVX2
223 #define HAS_ARGBMIRRORROW_AVX2
224 #define HAS_ARGBMULTIPLYROW_AVX2
225 #define HAS_ARGBSUBTRACTROW_AVX2
226 #define HAS_ARGBUNATTENUATEROW_AVX2
227 #endif  // defined(VISUALC_HAS_AVX2)
228 
229 // The following are Yasm x86 only:
230 // TODO(fbarchard): Port AVX2 to inline.
231 #if !defined(LIBYUV_DISABLE_X86) && defined(HAVE_YASM)
232     (defined(_M_IX86) || defined(_M_X64) || \
233     defined(__x86_64__) || defined(__i386__))
234 #define HAS_MERGEUVROW_AVX2
235 #define HAS_MERGEUVROW_MMX
236 #define HAS_SPLITUVROW_AVX2
237 #define HAS_SPLITUVROW_MMX
238 #define HAS_UYVYTOYROW_AVX2
239 #define HAS_UYVYTOYROW_MMX
240 #define HAS_YUY2TOYROW_AVX2
241 #define HAS_YUY2TOYROW_MMX
242 #endif
243 
244 // The following are disabled when SSSE3 is available:
245 #if !defined(LIBYUV_DISABLE_X86) && \
246     (defined(_M_IX86) || defined(__x86_64__) || defined(__i386__)) && \
247     !defined(LIBYUV_SSSE3_ONLY)
248 #define HAS_ARGBBLENDROW_SSE2
249 #define HAS_ARGBATTENUATEROW_SSE2
250 #define HAS_MIRRORROW_SSE2
251 #endif
252 
253 // The following are available on arm64 platforms:
254 #if !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
255 #endif
256 
257 // The following are available on Neon platforms:
258 #if !defined(LIBYUV_DISABLE_NEON) && \
259     (defined(__ARM_NEON__) || defined(LIBYUV_NEON))
260 #define HAS_ABGRTOUVROW_NEON
261 #define HAS_ABGRTOYROW_NEON
262 #define HAS_ARGB1555TOARGBROW_NEON
263 #define HAS_ARGB1555TOUVROW_NEON
264 #define HAS_ARGB1555TOYROW_NEON
265 #define HAS_ARGB4444TOARGBROW_NEON
266 #define HAS_ARGB4444TOUVROW_NEON
267 #define HAS_ARGB4444TOYROW_NEON
268 #define HAS_ARGBTOARGB1555ROW_NEON
269 #define HAS_ARGBTOARGB4444ROW_NEON
270 #define HAS_ARGBTOBAYERROW_NEON
271 #define HAS_ARGBTOBAYERGGROW_NEON
272 #define HAS_ARGBTORAWROW_NEON
273 #define HAS_ARGBTORGB24ROW_NEON
274 #define HAS_ARGBTORGB565ROW_NEON
275 #define HAS_ARGBTOUV411ROW_NEON
276 #define HAS_ARGBTOUV422ROW_NEON
277 #define HAS_ARGBTOUV444ROW_NEON
278 #define HAS_ARGBTOUVROW_NEON
279 #define HAS_ARGBTOUVJROW_NEON
280 #define HAS_ARGBTOYROW_NEON
281 #define HAS_ARGBTOYJROW_NEON
282 #define HAS_BGRATOUVROW_NEON
283 #define HAS_BGRATOYROW_NEON
284 #define HAS_COPYROW_NEON
285 #define HAS_HALFROW_NEON
286 #define HAS_I400TOARGBROW_NEON
287 #define HAS_I411TOARGBROW_NEON
288 #define HAS_I422TOABGRROW_NEON
289 #define HAS_I422TOARGB1555ROW_NEON
290 #define HAS_I422TOARGB4444ROW_NEON
291 #define HAS_I422TOARGBROW_NEON
292 #define HAS_I422TOBGRAROW_NEON
293 #define HAS_I422TORAWROW_NEON
294 #define HAS_I422TORGB24ROW_NEON
295 #define HAS_I422TORGB565ROW_NEON
296 #define HAS_I422TORGBAROW_NEON
297 #define HAS_I422TOUYVYROW_NEON
298 #define HAS_I422TOYUY2ROW_NEON
299 #define HAS_I444TOARGBROW_NEON
300 #define HAS_MERGEUVROW_NEON
301 #define HAS_MIRRORROW_NEON
302 #define HAS_MIRRORUVROW_NEON
303 #define HAS_NV12TOARGBROW_NEON
304 #define HAS_NV12TORGB565ROW_NEON
305 #define HAS_NV21TOARGBROW_NEON
306 #define HAS_NV21TORGB565ROW_NEON
307 #define HAS_RAWTOARGBROW_NEON
308 #define HAS_RAWTOUVROW_NEON
309 #define HAS_RAWTOYROW_NEON
310 #define HAS_RGB24TOARGBROW_NEON
311 #define HAS_RGB24TOUVROW_NEON
312 #define HAS_RGB24TOYROW_NEON
313 #define HAS_RGB565TOARGBROW_NEON
314 #define HAS_RGB565TOUVROW_NEON
315 #define HAS_RGB565TOYROW_NEON
316 #define HAS_RGBATOUVROW_NEON
317 #define HAS_RGBATOYROW_NEON
318 #define HAS_SETROW_NEON
319 #define HAS_SPLITUVROW_NEON
320 #define HAS_UYVYTOARGBROW_NEON
321 #define HAS_UYVYTOUV422ROW_NEON
322 #define HAS_UYVYTOUVROW_NEON
323 #define HAS_UYVYTOYROW_NEON
324 #define HAS_YTOARGBROW_NEON
325 #define HAS_YUY2TOARGBROW_NEON
326 #define HAS_YUY2TOUV422ROW_NEON
327 #define HAS_YUY2TOUVROW_NEON
328 #define HAS_YUY2TOYROW_NEON
329 
330 // Effects:
331 #define HAS_ARGBADDROW_NEON
332 #define HAS_ARGBATTENUATEROW_NEON
333 #define HAS_ARGBBLENDROW_NEON
334 #define HAS_ARGBGRAYROW_NEON
335 #define HAS_ARGBMIRRORROW_NEON
336 #define HAS_ARGBMULTIPLYROW_NEON
337 #define HAS_ARGBQUANTIZEROW_NEON
338 #define HAS_ARGBSEPIAROW_NEON
339 #define HAS_ARGBSHADEROW_NEON
340 #define HAS_ARGBSUBTRACTROW_NEON
341 #define HAS_SOBELROW_NEON
342 #define HAS_SOBELTOPLANEROW_NEON
343 #define HAS_SOBELXYROW_NEON
344 #define HAS_SOBELXROW_NEON
345 #define HAS_SOBELYROW_NEON
346 #define HAS_INTERPOLATEROW_NEON
347 // TODO(fbarchard): Investigate neon unittest failure.
348 // #define HAS_ARGBCOLORMATRIXROW_NEON
349 #endif
350 
351 // The following are available on Mips platforms:
352 #if !defined(LIBYUV_DISABLE_MIPS) && defined(__mips__) && \
353     (_MIPS_SIM == _MIPS_SIM_ABI32)
354 #define HAS_COPYROW_MIPS
355 #if defined(__mips_dsp) && (__mips_dsp_rev >= 2)
356 #define HAS_I422TOABGRROW_MIPS_DSPR2
357 #define HAS_I422TOARGBROW_MIPS_DSPR2
358 #define HAS_I422TOBGRAROW_MIPS_DSPR2
359 #define HAS_INTERPOLATEROWS_MIPS_DSPR2
360 #define HAS_MIRRORROW_MIPS_DSPR2
361 #define HAS_MIRRORUVROW_MIPS_DSPR2
362 #define HAS_SPLITUVROW_MIPS_DSPR2
363 #endif
364 #endif
365 
366 #if defined(_MSC_VER) && !defined(__CLR_VER)
367 #define SIMD_ALIGNED(var) __declspec(align(16)) var
368 typedef __declspec(align(16)) int16 vec16[8];
369 typedef __declspec(align(16)) int32 vec32[4];
370 typedef __declspec(align(16)) int8 vec8[16];
371 typedef __declspec(align(16)) uint16 uvec16[8];
372 typedef __declspec(align(16)) uint32 uvec32[4];
373 typedef __declspec(align(16)) uint8 uvec8[16];
374 typedef __declspec(align(32)) int16 lvec16[16];
375 typedef __declspec(align(32)) int32 lvec32[8];
376 typedef __declspec(align(32)) int8 lvec8[32];
377 typedef __declspec(align(32)) uint16 ulvec16[16];
378 typedef __declspec(align(32)) uint32 ulvec32[8];
379 typedef __declspec(align(32)) uint8 ulvec8[32];
380 
381 #elif defined(__GNUC__)
382 // Caveat GCC 4.2 to 4.7 have a known issue using vectors with const.
383 #define SIMD_ALIGNED(var) var __attribute__((aligned(16)))
384 typedef int16 __attribute__((vector_size(16))) vec16;
385 typedef int32 __attribute__((vector_size(16))) vec32;
386 typedef int8 __attribute__((vector_size(16))) vec8;
387 typedef uint16 __attribute__((vector_size(16))) uvec16;
388 typedef uint32 __attribute__((vector_size(16))) uvec32;
389 typedef uint8 __attribute__((vector_size(16))) uvec8;
390 #else
391 #define SIMD_ALIGNED(var) var
392 typedef int16 vec16[8];
393 typedef int32 vec32[4];
394 typedef int8 vec8[16];
395 typedef uint16 uvec16[8];
396 typedef uint32 uvec32[4];
397 typedef uint8 uvec8[16];
398 #endif
399 
400 #if defined(__APPLE__) || defined(__x86_64__) || defined(__llvm__)
401 #define OMITFP
402 #else
403 #define OMITFP __attribute__((optimize("omit-frame-pointer")))
404 #endif
405 
406 // NaCL macros for GCC x86 and x64.
407 
408 // TODO(nfullagar): When pepper_33 toolchain is distributed, default to
409 // NEW_BINUTILS and remove all BUNDLEALIGN occurances.
410 #if defined(__native_client__)
411 #define LABELALIGN ".p2align 5\n"
412 #else
413 #define LABELALIGN ".p2align 2\n"
414 #endif
415 #if defined(__native_client__) && defined(__x86_64__)
416 #if defined(NEW_BINUTILS)
417 #define BUNDLELOCK ".bundle_lock\n"
418 #define BUNDLEUNLOCK ".bundle_unlock\n"
419 #define BUNDLEALIGN "\n"
420 #else
421 #define BUNDLELOCK "\n"
422 #define BUNDLEUNLOCK "\n"
423 #define BUNDLEALIGN ".p2align 5\n"
424 #endif
425 #define MEMACCESS(base) "%%nacl:(%%r15,%q" #base ")"
426 #define MEMACCESS2(offset, base) "%%nacl:" #offset "(%%r15,%q" #base ")"
427 #define MEMLEA(offset, base) #offset "(%q" #base ")"
428 #define MEMLEA3(offset, index, scale) \
429     #offset "(,%q" #index "," #scale ")"
430 #define MEMLEA4(offset, base, index, scale) \
431     #offset "(%q" #base ",%q" #index "," #scale ")"
432 #define MEMMOVESTRING(s, d) "%%nacl:(%q" #s "),%%nacl:(%q" #d "), %%r15"
433 #define MEMSTORESTRING(reg, d) "%%" #reg ",%%nacl:(%q" #d "), %%r15"
434 #define MEMOPREG(opcode, offset, base, index, scale, reg) \
435     BUNDLELOCK \
436     "lea " #offset "(%q" #base ",%q" #index "," #scale "),%%r14d\n" \
437     #opcode " (%%r15,%%r14),%%" #reg "\n" \
438     BUNDLEUNLOCK
439 #define MEMOPMEM(opcode, reg, offset, base, index, scale) \
440     BUNDLELOCK \
441     "lea " #offset "(%q" #base ",%q" #index "," #scale "),%%r14d\n" \
442     #opcode " %%" #reg ",(%%r15,%%r14)\n" \
443     BUNDLEUNLOCK
444 #define MEMOPARG(opcode, offset, base, index, scale, arg) \
445     BUNDLELOCK \
446     "lea " #offset "(%q" #base ",%q" #index "," #scale "),%%r14d\n" \
447     #opcode " (%%r15,%%r14),%" #arg "\n" \
448     BUNDLEUNLOCK
449 #else  // defined(__native_client__) && defined(__x86_64__)
450 #define BUNDLEALIGN "\n"
451 #define MEMACCESS(base) "(%" #base ")"
452 #define MEMACCESS2(offset, base) #offset "(%" #base ")"
453 #define MEMLEA(offset, base) #offset "(%" #base ")"
454 #define MEMLEA3(offset, index, scale) \
455     #offset "(,%" #index "," #scale ")"
456 #define MEMLEA4(offset, base, index, scale) \
457     #offset "(%" #base ",%" #index "," #scale ")"
458 #define MEMMOVESTRING(s, d)
459 #define MEMSTORESTRING(reg, d)
460 #define MEMOPREG(opcode, offset, base, index, scale, reg) \
461     #opcode " " #offset "(%" #base ",%" #index "," #scale "),%%" #reg "\n"
462 #define MEMOPMEM(opcode, reg, offset, base, index, scale) \
463     #opcode " %%" #reg ","#offset "(%" #base ",%" #index "," #scale ")\n"
464 #define MEMOPARG(opcode, offset, base, index, scale, arg) \
465     #opcode " " #offset "(%" #base ",%" #index "," #scale "),%" #arg "\n"
466 #endif  // defined(__native_client__) && defined(__x86_64__)
467 
468 #if defined(__arm__)
469 #undef MEMACCESS
470 #if defined(__native_client__)
471 #define MEMACCESS(base) ".p2align   3\nbic %" #base ", #0xc0000000\n"
472 #else
473 #define MEMACCESS(base) "\n"
474 #endif
475 #endif
476 
477 void I444ToARGBRow_NEON(const uint8* src_y,
478                         const uint8* src_u,
479                         const uint8* src_v,
480                         uint8* dst_argb,
481                         int width);
482 void I422ToARGBRow_NEON(const uint8* src_y,
483                         const uint8* src_u,
484                         const uint8* src_v,
485                         uint8* dst_argb,
486                         int width);
487 void I411ToARGBRow_NEON(const uint8* src_y,
488                         const uint8* src_u,
489                         const uint8* src_v,
490                         uint8* dst_argb,
491                         int width);
492 void I422ToBGRARow_NEON(const uint8* src_y,
493                         const uint8* src_u,
494                         const uint8* src_v,
495                         uint8* dst_bgra,
496                         int width);
497 void I422ToABGRRow_NEON(const uint8* src_y,
498                         const uint8* src_u,
499                         const uint8* src_v,
500                         uint8* dst_abgr,
501                         int width);
502 void I422ToRGBARow_NEON(const uint8* src_y,
503                         const uint8* src_u,
504                         const uint8* src_v,
505                         uint8* dst_rgba,
506                         int width);
507 void I422ToRGB24Row_NEON(const uint8* src_y,
508                          const uint8* src_u,
509                          const uint8* src_v,
510                          uint8* dst_rgb24,
511                          int width);
512 void I422ToRAWRow_NEON(const uint8* src_y,
513                        const uint8* src_u,
514                        const uint8* src_v,
515                        uint8* dst_raw,
516                        int width);
517 void I422ToRGB565Row_NEON(const uint8* src_y,
518                           const uint8* src_u,
519                           const uint8* src_v,
520                           uint8* dst_rgb565,
521                           int width);
522 void I422ToARGB1555Row_NEON(const uint8* src_y,
523                             const uint8* src_u,
524                             const uint8* src_v,
525                             uint8* dst_argb1555,
526                             int width);
527 void I422ToARGB4444Row_NEON(const uint8* src_y,
528                             const uint8* src_u,
529                             const uint8* src_v,
530                             uint8* dst_argb4444,
531                             int width);
532 void NV12ToARGBRow_NEON(const uint8* src_y,
533                         const uint8* src_uv,
534                         uint8* dst_argb,
535                         int width);
536 void NV21ToARGBRow_NEON(const uint8* src_y,
537                         const uint8* src_vu,
538                         uint8* dst_argb,
539                         int width);
540 void NV12ToRGB565Row_NEON(const uint8* src_y,
541                           const uint8* src_uv,
542                           uint8* dst_rgb565,
543                           int width);
544 void NV21ToRGB565Row_NEON(const uint8* src_y,
545                           const uint8* src_vu,
546                           uint8* dst_rgb565,
547                           int width);
548 void YUY2ToARGBRow_NEON(const uint8* src_yuy2,
549                         uint8* dst_argb,
550                         int width);
551 void UYVYToARGBRow_NEON(const uint8* src_uyvy,
552                         uint8* dst_argb,
553                         int width);
554 
555 void ARGBToYRow_AVX2(const uint8* src_argb, uint8* dst_y, int pix);
556 void ARGBToYRow_Any_AVX2(const uint8* src_argb, uint8* dst_y, int pix);
557 void ARGBToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix);
558 void ARGBToYJRow_AVX2(const uint8* src_argb, uint8* dst_y, int pix);
559 void ARGBToYJRow_Any_AVX2(const uint8* src_argb, uint8* dst_y, int pix);
560 void ARGBToYJRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix);
561 void BGRAToYRow_SSSE3(const uint8* src_bgra, uint8* dst_y, int pix);
562 void ABGRToYRow_SSSE3(const uint8* src_abgr, uint8* dst_y, int pix);
563 void RGBAToYRow_SSSE3(const uint8* src_rgba, uint8* dst_y, int pix);
564 void RGB24ToYRow_SSSE3(const uint8* src_rgb24, uint8* dst_y, int pix);
565 void RAWToYRow_SSSE3(const uint8* src_raw, uint8* dst_y, int pix);
566 void ARGBToYRow_Unaligned_SSSE3(const uint8* src_argb, uint8* dst_y, int pix);
567 void ARGBToYJRow_Unaligned_SSSE3(const uint8* src_argb, uint8* dst_y, int pix);
568 void BGRAToYRow_Unaligned_SSSE3(const uint8* src_bgra, uint8* dst_y, int pix);
569 void ABGRToYRow_Unaligned_SSSE3(const uint8* src_abgr, uint8* dst_y, int pix);
570 void RGBAToYRow_Unaligned_SSSE3(const uint8* src_rgba, uint8* dst_y, int pix);
571 void RGB24ToYRow_Unaligned_SSSE3(const uint8* src_rgb24, uint8* dst_y, int pix);
572 void RAWToYRow_Unaligned_SSSE3(const uint8* src_raw, uint8* dst_y, int pix);
573 void ARGBToYRow_NEON(const uint8* src_argb, uint8* dst_y, int pix);
574 void ARGBToYJRow_NEON(const uint8* src_argb, uint8* dst_y, int pix);
575 void ARGBToUV444Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
576                          int pix);
577 void ARGBToUV422Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
578                          int pix);
579 void ARGBToUV411Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
580                          int pix);
581 void ARGBToUVRow_NEON(const uint8* src_argb, int src_stride_argb,
582                       uint8* dst_u, uint8* dst_v, int pix);
583 void ARGBToUVJRow_NEON(const uint8* src_argb, int src_stride_argb,
584                        uint8* dst_u, uint8* dst_v, int pix);
585 void BGRAToUVRow_NEON(const uint8* src_bgra, int src_stride_bgra,
586                       uint8* dst_u, uint8* dst_v, int pix);
587 void ABGRToUVRow_NEON(const uint8* src_abgr, int src_stride_abgr,
588                       uint8* dst_u, uint8* dst_v, int pix);
589 void RGBAToUVRow_NEON(const uint8* src_rgba, int src_stride_rgba,
590                       uint8* dst_u, uint8* dst_v, int pix);
591 void RGB24ToUVRow_NEON(const uint8* src_rgb24, int src_stride_rgb24,
592                        uint8* dst_u, uint8* dst_v, int pix);
593 void RAWToUVRow_NEON(const uint8* src_raw, int src_stride_raw,
594                      uint8* dst_u, uint8* dst_v, int pix);
595 void RGB565ToUVRow_NEON(const uint8* src_rgb565, int src_stride_rgb565,
596                         uint8* dst_u, uint8* dst_v, int pix);
597 void ARGB1555ToUVRow_NEON(const uint8* src_argb1555, int src_stride_argb1555,
598                           uint8* dst_u, uint8* dst_v, int pix);
599 void ARGB4444ToUVRow_NEON(const uint8* src_argb4444, int src_stride_argb4444,
600                           uint8* dst_u, uint8* dst_v, int pix);
601 void BGRAToYRow_NEON(const uint8* src_bgra, uint8* dst_y, int pix);
602 void ABGRToYRow_NEON(const uint8* src_abgr, uint8* dst_y, int pix);
603 void RGBAToYRow_NEON(const uint8* src_rgba, uint8* dst_y, int pix);
604 void RGB24ToYRow_NEON(const uint8* src_rgb24, uint8* dst_y, int pix);
605 void RAWToYRow_NEON(const uint8* src_raw, uint8* dst_y, int pix);
606 void RGB565ToYRow_NEON(const uint8* src_rgb565, uint8* dst_y, int pix);
607 void ARGB1555ToYRow_NEON(const uint8* src_argb1555, uint8* dst_y, int pix);
608 void ARGB4444ToYRow_NEON(const uint8* src_argb4444, uint8* dst_y, int pix);
609 void ARGBToYRow_C(const uint8* src_argb, uint8* dst_y, int pix);
610 void ARGBToYJRow_C(const uint8* src_argb, uint8* dst_y, int pix);
611 void BGRAToYRow_C(const uint8* src_bgra, uint8* dst_y, int pix);
612 void ABGRToYRow_C(const uint8* src_abgr, uint8* dst_y, int pix);
613 void RGBAToYRow_C(const uint8* src_rgba, uint8* dst_y, int pix);
614 void RGB24ToYRow_C(const uint8* src_rgb24, uint8* dst_y, int pix);
615 void RAWToYRow_C(const uint8* src_raw, uint8* dst_y, int pix);
616 void RGB565ToYRow_C(const uint8* src_rgb565, uint8* dst_y, int pix);
617 void ARGB1555ToYRow_C(const uint8* src_argb1555, uint8* dst_y, int pix);
618 void ARGB4444ToYRow_C(const uint8* src_argb4444, uint8* dst_y, int pix);
619 void ARGBToYRow_Any_SSSE3(const uint8* src_argb, uint8* dst_y, int pix);
620 void ARGBToYJRow_Any_SSSE3(const uint8* src_argb, uint8* dst_y, int pix);
621 void BGRAToYRow_Any_SSSE3(const uint8* src_bgra, uint8* dst_y, int pix);
622 void ABGRToYRow_Any_SSSE3(const uint8* src_abgr, uint8* dst_y, int pix);
623 void RGBAToYRow_Any_SSSE3(const uint8* src_rgba, uint8* dst_y, int pix);
624 void RGB24ToYRow_Any_SSSE3(const uint8* src_rgb24, uint8* dst_y, int pix);
625 void RAWToYRow_Any_SSSE3(const uint8* src_raw, uint8* dst_y, int pix);
626 void ARGBToYRow_Any_NEON(const uint8* src_argb, uint8* dst_y, int pix);
627 void ARGBToYJRow_Any_NEON(const uint8* src_argb, uint8* dst_y, int pix);
628 void BGRAToYRow_Any_NEON(const uint8* src_bgra, uint8* dst_y, int pix);
629 void ABGRToYRow_Any_NEON(const uint8* src_abgr, uint8* dst_y, int pix);
630 void RGBAToYRow_Any_NEON(const uint8* src_rgba, uint8* dst_y, int pix);
631 void RGB24ToYRow_Any_NEON(const uint8* src_rgb24, uint8* dst_y, int pix);
632 void RAWToYRow_Any_NEON(const uint8* src_raw, uint8* dst_y, int pix);
633 void RGB565ToYRow_Any_NEON(const uint8* src_rgb565, uint8* dst_y, int pix);
634 void ARGB1555ToYRow_Any_NEON(const uint8* src_argb1555, uint8* dst_y, int pix);
635 void ARGB4444ToYRow_Any_NEON(const uint8* src_argb4444, uint8* dst_y, int pix);
636 
637 void ARGBToUVRow_AVX2(const uint8* src_argb, int src_stride_argb,
638                       uint8* dst_u, uint8* dst_v, int width);
639 void ARGBToUVRow_Any_AVX2(const uint8* src_argb, int src_stride_argb,
640                           uint8* dst_u, uint8* dst_v, int width);
641 void ARGBToUVRow_SSSE3(const uint8* src_argb, int src_stride_argb,
642                        uint8* dst_u, uint8* dst_v, int width);
643 void ARGBToUVJRow_SSSE3(const uint8* src_argb, int src_stride_argb,
644                         uint8* dst_u, uint8* dst_v, int width);
645 void BGRAToUVRow_SSSE3(const uint8* src_bgra, int src_stride_bgra,
646                        uint8* dst_u, uint8* dst_v, int width);
647 void ABGRToUVRow_SSSE3(const uint8* src_abgr, int src_stride_abgr,
648                        uint8* dst_u, uint8* dst_v, int width);
649 void RGBAToUVRow_SSSE3(const uint8* src_rgba, int src_stride_rgba,
650                        uint8* dst_u, uint8* dst_v, int width);
651 void ARGBToUVRow_Unaligned_SSSE3(const uint8* src_argb, int src_stride_argb,
652                                  uint8* dst_u, uint8* dst_v, int width);
653 void ARGBToUVJRow_Unaligned_SSSE3(const uint8* src_argb, int src_stride_argb,
654                                   uint8* dst_u, uint8* dst_v, int width);
655 void BGRAToUVRow_Unaligned_SSSE3(const uint8* src_bgra, int src_stride_bgra,
656                                  uint8* dst_u, uint8* dst_v, int width);
657 void ABGRToUVRow_Unaligned_SSSE3(const uint8* src_abgr, int src_stride_abgr,
658                                  uint8* dst_u, uint8* dst_v, int width);
659 void RGBAToUVRow_Unaligned_SSSE3(const uint8* src_rgba, int src_stride_rgba,
660                                  uint8* dst_u, uint8* dst_v, int width);
661 void ARGBToUVRow_Any_SSSE3(const uint8* src_argb, int src_stride_argb,
662                            uint8* dst_u, uint8* dst_v, int width);
663 void ARGBToUVJRow_Any_SSSE3(const uint8* src_argb, int src_stride_argb,
664                             uint8* dst_u, uint8* dst_v, int width);
665 void BGRAToUVRow_Any_SSSE3(const uint8* src_bgra, int src_stride_bgra,
666                            uint8* dst_u, uint8* dst_v, int width);
667 void ABGRToUVRow_Any_SSSE3(const uint8* src_abgr, int src_stride_abgr,
668                            uint8* dst_u, uint8* dst_v, int width);
669 void RGBAToUVRow_Any_SSSE3(const uint8* src_rgba, int src_stride_rgba,
670                            uint8* dst_u, uint8* dst_v, int width);
671 void ARGBToUV444Row_Any_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
672                              int pix);
673 void ARGBToUV422Row_Any_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
674                              int pix);
675 void ARGBToUV411Row_Any_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
676                              int pix);
677 void ARGBToUVRow_Any_NEON(const uint8* src_argb, int src_stride_argb,
678                           uint8* dst_u, uint8* dst_v, int pix);
679 void ARGBToUVJRow_Any_NEON(const uint8* src_argb, int src_stride_argb,
680                            uint8* dst_u, uint8* dst_v, int pix);
681 void BGRAToUVRow_Any_NEON(const uint8* src_bgra, int src_stride_bgra,
682                           uint8* dst_u, uint8* dst_v, int pix);
683 void ABGRToUVRow_Any_NEON(const uint8* src_abgr, int src_stride_abgr,
684                           uint8* dst_u, uint8* dst_v, int pix);
685 void RGBAToUVRow_Any_NEON(const uint8* src_rgba, int src_stride_rgba,
686                           uint8* dst_u, uint8* dst_v, int pix);
687 void RGB24ToUVRow_Any_NEON(const uint8* src_rgb24, int src_stride_rgb24,
688                            uint8* dst_u, uint8* dst_v, int pix);
689 void RAWToUVRow_Any_NEON(const uint8* src_raw, int src_stride_raw,
690                          uint8* dst_u, uint8* dst_v, int pix);
691 void RGB565ToUVRow_Any_NEON(const uint8* src_rgb565, int src_stride_rgb565,
692                             uint8* dst_u, uint8* dst_v, int pix);
693 void ARGB1555ToUVRow_Any_NEON(const uint8* src_argb1555,
694                               int src_stride_argb1555,
695                               uint8* dst_u, uint8* dst_v, int pix);
696 void ARGB4444ToUVRow_Any_NEON(const uint8* src_argb4444,
697                               int src_stride_argb4444,
698                               uint8* dst_u, uint8* dst_v, int pix);
699 void ARGBToUVRow_C(const uint8* src_argb, int src_stride_argb,
700                    uint8* dst_u, uint8* dst_v, int width);
701 void ARGBToUVJRow_C(const uint8* src_argb, int src_stride_argb,
702                     uint8* dst_u, uint8* dst_v, int width);
703 void BGRAToUVRow_C(const uint8* src_bgra, int src_stride_bgra,
704                    uint8* dst_u, uint8* dst_v, int width);
705 void ABGRToUVRow_C(const uint8* src_abgr, int src_stride_abgr,
706                    uint8* dst_u, uint8* dst_v, int width);
707 void RGBAToUVRow_C(const uint8* src_rgba, int src_stride_rgba,
708                    uint8* dst_u, uint8* dst_v, int width);
709 void RGB24ToUVRow_C(const uint8* src_rgb24, int src_stride_rgb24,
710                     uint8* dst_u, uint8* dst_v, int width);
711 void RAWToUVRow_C(const uint8* src_raw, int src_stride_raw,
712                   uint8* dst_u, uint8* dst_v, int width);
713 void RGB565ToUVRow_C(const uint8* src_rgb565, int src_stride_rgb565,
714                      uint8* dst_u, uint8* dst_v, int width);
715 void ARGB1555ToUVRow_C(const uint8* src_argb1555, int src_stride_argb1555,
716                        uint8* dst_u, uint8* dst_v, int width);
717 void ARGB4444ToUVRow_C(const uint8* src_argb4444, int src_stride_argb4444,
718                        uint8* dst_u, uint8* dst_v, int width);
719 
720 void ARGBToUV444Row_SSSE3(const uint8* src_argb,
721                           uint8* dst_u, uint8* dst_v, int width);
722 void ARGBToUV444Row_Unaligned_SSSE3(const uint8* src_argb,
723                                     uint8* dst_u, uint8* dst_v, int width);
724 void ARGBToUV444Row_Any_SSSE3(const uint8* src_argb,
725                               uint8* dst_u, uint8* dst_v, int width);
726 
727 void ARGBToUV422Row_SSSE3(const uint8* src_argb,
728                           uint8* dst_u, uint8* dst_v, int width);
729 void ARGBToUV422Row_Unaligned_SSSE3(const uint8* src_argb,
730                                     uint8* dst_u, uint8* dst_v, int width);
731 void ARGBToUV422Row_Any_SSSE3(const uint8* src_argb,
732                               uint8* dst_u, uint8* dst_v, int width);
733 
734 void ARGBToUV444Row_C(const uint8* src_argb,
735                       uint8* dst_u, uint8* dst_v, int width);
736 void ARGBToUV422Row_C(const uint8* src_argb,
737                       uint8* dst_u, uint8* dst_v, int width);
738 void ARGBToUV411Row_C(const uint8* src_argb,
739                       uint8* dst_u, uint8* dst_v, int width);
740 
741 void MirrorRow_AVX2(const uint8* src, uint8* dst, int width);
742 void MirrorRow_SSSE3(const uint8* src, uint8* dst, int width);
743 void MirrorRow_SSE2(const uint8* src, uint8* dst, int width);
744 void MirrorRow_NEON(const uint8* src, uint8* dst, int width);
745 void MirrorRow_MIPS_DSPR2(const uint8* src, uint8* dst, int width);
746 void MirrorRow_C(const uint8* src, uint8* dst, int width);
747 
748 void MirrorUVRow_SSSE3(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
749                        int width);
750 void MirrorUVRow_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
751                       int width);
752 void MirrorUVRow_MIPS_DSPR2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
753                             int width);
754 void MirrorUVRow_C(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
755                    int width);
756 
757 void ARGBMirrorRow_AVX2(const uint8* src, uint8* dst, int width);
758 void ARGBMirrorRow_SSSE3(const uint8* src, uint8* dst, int width);
759 void ARGBMirrorRow_NEON(const uint8* src, uint8* dst, int width);
760 void ARGBMirrorRow_C(const uint8* src, uint8* dst, int width);
761 
762 void SplitUVRow_C(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix);
763 void SplitUVRow_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix);
764 void SplitUVRow_AVX2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix);
765 void SplitUVRow_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix);
766 void SplitUVRow_MIPS_DSPR2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
767                            int pix);
768 void SplitUVRow_Unaligned_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
769                                int pix);
770 void SplitUVRow_Unaligned_MIPS_DSPR2(const uint8* src_uv, uint8* dst_u,
771                                      uint8* dst_v, int pix);
772 void SplitUVRow_Any_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
773                          int pix);
774 void SplitUVRow_Any_AVX2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
775                          int pix);
776 void SplitUVRow_Any_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
777                          int pix);
778 void SplitUVRow_Any_MIPS_DSPR2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
779                                int pix);
780 
781 void MergeUVRow_C(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
782                   int width);
783 void MergeUVRow_SSE2(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
784                      int width);
785 void MergeUVRow_AVX2(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
786                      int width);
787 void MergeUVRow_NEON(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
788                      int width);
789 void MergeUVRow_Unaligned_SSE2(const uint8* src_u, const uint8* src_v,
790                                uint8* dst_uv, int width);
791 void MergeUVRow_Any_SSE2(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
792                          int width);
793 void MergeUVRow_Any_AVX2(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
794                          int width);
795 void MergeUVRow_Any_NEON(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
796                          int width);
797 
798 void CopyRow_SSE2(const uint8* src, uint8* dst, int count);
799 void CopyRow_ERMS(const uint8* src, uint8* dst, int count);
800 void CopyRow_X86(const uint8* src, uint8* dst, int count);
801 void CopyRow_NEON(const uint8* src, uint8* dst, int count);
802 void CopyRow_MIPS(const uint8* src, uint8* dst, int count);
803 void CopyRow_C(const uint8* src, uint8* dst, int count);
804 
805 void CopyRow_16_C(const uint16* src, uint16* dst, int count);
806 
807 void ARGBCopyAlphaRow_C(const uint8* src_argb, uint8* dst_argb, int width);
808 void ARGBCopyAlphaRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width);
809 void ARGBCopyAlphaRow_AVX2(const uint8* src_argb, uint8* dst_argb, int width);
810 
811 void ARGBCopyYToAlphaRow_C(const uint8* src_y, uint8* dst_argb, int width);
812 void ARGBCopyYToAlphaRow_SSE2(const uint8* src_y, uint8* dst_argb, int width);
813 void ARGBCopyYToAlphaRow_AVX2(const uint8* src_y, uint8* dst_argb, int width);
814 
815 void SetRow_X86(uint8* dst, uint32 v32, int count);
816 void ARGBSetRows_X86(uint8* dst, uint32 v32, int width,
817                      int dst_stride, int height);
818 void SetRow_NEON(uint8* dst, uint32 v32, int count);
819 void ARGBSetRows_NEON(uint8* dst, uint32 v32, int width,
820                       int dst_stride, int height);
821 void SetRow_C(uint8* dst, uint32 v32, int count);
822 void ARGBSetRows_C(uint8* dst, uint32 v32, int width, int dst_stride,
823                    int height);
824 
825 // ARGBShufflers for BGRAToARGB etc.
826 void ARGBShuffleRow_C(const uint8* src_argb, uint8* dst_argb,
827                       const uint8* shuffler, int pix);
828 void ARGBShuffleRow_SSE2(const uint8* src_argb, uint8* dst_argb,
829                          const uint8* shuffler, int pix);
830 void ARGBShuffleRow_SSSE3(const uint8* src_argb, uint8* dst_argb,
831                           const uint8* shuffler, int pix);
832 void ARGBShuffleRow_AVX2(const uint8* src_argb, uint8* dst_argb,
833                          const uint8* shuffler, int pix);
834 void ARGBShuffleRow_NEON(const uint8* src_argb, uint8* dst_argb,
835                          const uint8* shuffler, int pix);
836 void ARGBShuffleRow_Unaligned_SSSE3(const uint8* src_argb, uint8* dst_argb,
837                                     const uint8* shuffler, int pix);
838 void ARGBShuffleRow_Any_SSE2(const uint8* src_argb, uint8* dst_argb,
839                              const uint8* shuffler, int pix);
840 void ARGBShuffleRow_Any_SSSE3(const uint8* src_argb, uint8* dst_argb,
841                               const uint8* shuffler, int pix);
842 void ARGBShuffleRow_Any_AVX2(const uint8* src_argb, uint8* dst_argb,
843                              const uint8* shuffler, int pix);
844 void ARGBShuffleRow_Any_NEON(const uint8* src_argb, uint8* dst_argb,
845                              const uint8* shuffler, int pix);
846 
847 void RGB24ToARGBRow_SSSE3(const uint8* src_rgb24, uint8* dst_argb, int pix);
848 void RAWToARGBRow_SSSE3(const uint8* src_raw, uint8* dst_argb, int pix);
849 void RGB565ToARGBRow_SSE2(const uint8* src_rgb565, uint8* dst_argb, int pix);
850 void ARGB1555ToARGBRow_SSE2(const uint8* src_argb1555, uint8* dst_argb,
851                             int pix);
852 void ARGB4444ToARGBRow_SSE2(const uint8* src_argb4444, uint8* dst_argb,
853                             int pix);
854 
855 void RGB24ToARGBRow_NEON(const uint8* src_rgb24, uint8* dst_argb, int pix);
856 void RAWToARGBRow_NEON(const uint8* src_raw, uint8* dst_argb, int pix);
857 void RGB565ToARGBRow_NEON(const uint8* src_rgb565, uint8* dst_argb, int pix);
858 void ARGB1555ToARGBRow_NEON(const uint8* src_argb1555, uint8* dst_argb,
859                             int pix);
860 void ARGB4444ToARGBRow_NEON(const uint8* src_argb4444, uint8* dst_argb,
861                             int pix);
862 void RGB24ToARGBRow_C(const uint8* src_rgb24, uint8* dst_argb, int pix);
863 void RAWToARGBRow_C(const uint8* src_raw, uint8* dst_argb, int pix);
864 void RGB565ToARGBRow_C(const uint8* src_rgb, uint8* dst_argb, int pix);
865 void ARGB1555ToARGBRow_C(const uint8* src_argb, uint8* dst_argb, int pix);
866 void ARGB4444ToARGBRow_C(const uint8* src_argb, uint8* dst_argb, int pix);
867 void RGB24ToARGBRow_Any_SSSE3(const uint8* src_rgb24, uint8* dst_argb, int pix);
868 void RAWToARGBRow_Any_SSSE3(const uint8* src_raw, uint8* dst_argb, int pix);
869 void RGB565ToARGBRow_Any_SSE2(const uint8* src_rgb565, uint8* dst_argb,
870                               int pix);
871 void ARGB1555ToARGBRow_Any_SSE2(const uint8* src_argb1555, uint8* dst_argb,
872                                 int pix);
873 void ARGB4444ToARGBRow_Any_SSE2(const uint8* src_argb4444, uint8* dst_argb,
874                                 int pix);
875 void RGB24ToARGBRow_Any_NEON(const uint8* src_rgb24, uint8* dst_argb, int pix);
876 void RAWToARGBRow_Any_NEON(const uint8* src_raw, uint8* dst_argb, int pix);
877 void RGB565ToARGBRow_Any_NEON(const uint8* src_rgb565, uint8* dst_argb,
878                               int pix);
879 void ARGB1555ToARGBRow_Any_NEON(const uint8* src_argb1555, uint8* dst_argb,
880                                 int pix);
881 void ARGB4444ToARGBRow_Any_NEON(const uint8* src_argb4444, uint8* dst_argb,
882                                 int pix);
883 
884 void ARGBToRGB24Row_SSSE3(const uint8* src_argb, uint8* dst_rgb, int pix);
885 void ARGBToRAWRow_SSSE3(const uint8* src_argb, uint8* dst_rgb, int pix);
886 void ARGBToRGB565Row_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix);
887 void ARGBToARGB1555Row_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix);
888 void ARGBToARGB4444Row_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix);
889 
890 void ARGBToRGB24Row_NEON(const uint8* src_argb, uint8* dst_rgb, int pix);
891 void ARGBToRAWRow_NEON(const uint8* src_argb, uint8* dst_rgb, int pix);
892 void ARGBToRGB565Row_NEON(const uint8* src_argb, uint8* dst_rgb, int pix);
893 void ARGBToARGB1555Row_NEON(const uint8* src_argb, uint8* dst_rgb, int pix);
894 void ARGBToARGB4444Row_NEON(const uint8* src_argb, uint8* dst_rgb, int pix);
895 
896 void ARGBToRGBARow_C(const uint8* src_argb, uint8* dst_rgb, int pix);
897 void ARGBToRGB24Row_C(const uint8* src_argb, uint8* dst_rgb, int pix);
898 void ARGBToRAWRow_C(const uint8* src_argb, uint8* dst_rgb, int pix);
899 void ARGBToRGB565Row_C(const uint8* src_argb, uint8* dst_rgb, int pix);
900 void ARGBToARGB1555Row_C(const uint8* src_argb, uint8* dst_rgb, int pix);
901 void ARGBToARGB4444Row_C(const uint8* src_argb, uint8* dst_rgb, int pix);
902 
903 void I400ToARGBRow_SSE2(const uint8* src_y, uint8* dst_argb, int pix);
904 void I400ToARGBRow_Unaligned_SSE2(const uint8* src_y, uint8* dst_argb, int pix);
905 void I400ToARGBRow_NEON(const uint8* src_y, uint8* dst_argb, int pix);
906 void I400ToARGBRow_C(const uint8* src_y, uint8* dst_argb, int pix);
907 void I400ToARGBRow_Any_SSE2(const uint8* src_y, uint8* dst_argb, int pix);
908 void I400ToARGBRow_Any_NEON(const uint8* src_y, uint8* dst_argb, int pix);
909 
910 void I444ToARGBRow_C(const uint8* src_y,
911                      const uint8* src_u,
912                      const uint8* src_v,
913                      uint8* dst_argb,
914                      int width);
915 void I422ToARGBRow_C(const uint8* src_y,
916                      const uint8* src_u,
917                      const uint8* src_v,
918                      uint8* dst_argb,
919                      int width);
920 void I411ToARGBRow_C(const uint8* src_y,
921                      const uint8* src_u,
922                      const uint8* src_v,
923                      uint8* dst_argb,
924                      int width);
925 void NV12ToARGBRow_C(const uint8* src_y,
926                      const uint8* src_uv,
927                      uint8* dst_argb,
928                      int width);
929 void NV21ToRGB565Row_C(const uint8* src_y,
930                        const uint8* src_vu,
931                        uint8* dst_argb,
932                        int width);
933 void NV12ToRGB565Row_C(const uint8* src_y,
934                        const uint8* src_uv,
935                        uint8* dst_argb,
936                        int width);
937 void NV21ToARGBRow_C(const uint8* src_y,
938                      const uint8* src_vu,
939                      uint8* dst_argb,
940                      int width);
941 void YUY2ToARGBRow_C(const uint8* src_yuy2,
942                      uint8* dst_argb,
943                      int width);
944 void UYVYToARGBRow_C(const uint8* src_uyvy,
945                      uint8* dst_argb,
946                      int width);
947 void I422ToBGRARow_C(const uint8* src_y,
948                      const uint8* src_u,
949                      const uint8* src_v,
950                      uint8* dst_bgra,
951                      int width);
952 void I422ToABGRRow_C(const uint8* src_y,
953                      const uint8* src_u,
954                      const uint8* src_v,
955                      uint8* dst_abgr,
956                      int width);
957 void I422ToRGBARow_C(const uint8* src_y,
958                      const uint8* src_u,
959                      const uint8* src_v,
960                      uint8* dst_rgba,
961                      int width);
962 void I422ToRGB24Row_C(const uint8* src_y,
963                       const uint8* src_u,
964                       const uint8* src_v,
965                       uint8* dst_rgb24,
966                       int width);
967 void I422ToRAWRow_C(const uint8* src_y,
968                     const uint8* src_u,
969                     const uint8* src_v,
970                     uint8* dst_raw,
971                     int width);
972 void I422ToARGB4444Row_C(const uint8* src_y,
973                          const uint8* src_u,
974                          const uint8* src_v,
975                          uint8* dst_argb4444,
976                          int width);
977 void I422ToARGB1555Row_C(const uint8* src_y,
978                          const uint8* src_u,
979                          const uint8* src_v,
980                          uint8* dst_argb4444,
981                          int width);
982 void I422ToRGB565Row_C(const uint8* src_y,
983                        const uint8* src_u,
984                        const uint8* src_v,
985                        uint8* dst_rgb565,
986                        int width);
987 void YToARGBRow_C(const uint8* src_y,
988                   uint8* dst_argb,
989                   int width);
990 void I422ToARGBRow_AVX2(const uint8* src_y,
991                         const uint8* src_u,
992                         const uint8* src_v,
993                         uint8* dst_argb,
994                         int width);
995 void I444ToARGBRow_SSSE3(const uint8* src_y,
996                          const uint8* src_u,
997                          const uint8* src_v,
998                          uint8* dst_argb,
999                          int width);
1000 void I422ToARGBRow_SSSE3(const uint8* src_y,
1001                          const uint8* src_u,
1002                          const uint8* src_v,
1003                          uint8* dst_argb,
1004                          int width);
1005 void I411ToARGBRow_SSSE3(const uint8* src_y,
1006                          const uint8* src_u,
1007                          const uint8* src_v,
1008                          uint8* dst_argb,
1009                          int width);
1010 void NV12ToARGBRow_SSSE3(const uint8* src_y,
1011                          const uint8* src_uv,
1012                          uint8* dst_argb,
1013                          int width);
1014 void NV21ToARGBRow_SSSE3(const uint8* src_y,
1015                          const uint8* src_vu,
1016                          uint8* dst_argb,
1017                          int width);
1018 void NV12ToRGB565Row_SSSE3(const uint8* src_y,
1019                            const uint8* src_uv,
1020                            uint8* dst_argb,
1021                            int width);
1022 void NV21ToRGB565Row_SSSE3(const uint8* src_y,
1023                            const uint8* src_vu,
1024                            uint8* dst_argb,
1025                            int width);
1026 void YUY2ToARGBRow_SSSE3(const uint8* src_yuy2,
1027                          uint8* dst_argb,
1028                          int width);
1029 void UYVYToARGBRow_SSSE3(const uint8* src_uyvy,
1030                          uint8* dst_argb,
1031                          int width);
1032 void I422ToBGRARow_SSSE3(const uint8* src_y,
1033                          const uint8* src_u,
1034                          const uint8* src_v,
1035                          uint8* dst_bgra,
1036                          int width);
1037 void I422ToABGRRow_SSSE3(const uint8* src_y,
1038                          const uint8* src_u,
1039                          const uint8* src_v,
1040                          uint8* dst_abgr,
1041                          int width);
1042 void I422ToRGBARow_SSSE3(const uint8* src_y,
1043                          const uint8* src_u,
1044                          const uint8* src_v,
1045                          uint8* dst_rgba,
1046                          int width);
1047 void I422ToARGB4444Row_SSSE3(const uint8* src_y,
1048                              const uint8* src_u,
1049                              const uint8* src_v,
1050                              uint8* dst_argb,
1051                              int width);
1052 void I422ToARGB1555Row_SSSE3(const uint8* src_y,
1053                              const uint8* src_u,
1054                              const uint8* src_v,
1055                              uint8* dst_argb,
1056                              int width);
1057 void I422ToRGB565Row_SSSE3(const uint8* src_y,
1058                            const uint8* src_u,
1059                            const uint8* src_v,
1060                            uint8* dst_argb,
1061                            int width);
1062 // RGB24/RAW are unaligned.
1063 void I422ToRGB24Row_SSSE3(const uint8* src_y,
1064                           const uint8* src_u,
1065                           const uint8* src_v,
1066                           uint8* dst_rgb24,
1067                           int width);
1068 void I422ToRAWRow_SSSE3(const uint8* src_y,
1069                         const uint8* src_u,
1070                         const uint8* src_v,
1071                         uint8* dst_raw,
1072                         int width);
1073 
1074 void I444ToARGBRow_Unaligned_SSSE3(const uint8* src_y,
1075                                    const uint8* src_u,
1076                                    const uint8* src_v,
1077                                    uint8* dst_argb,
1078                                    int width);
1079 void I422ToARGBRow_Unaligned_SSSE3(const uint8* src_y,
1080                                    const uint8* src_u,
1081                                    const uint8* src_v,
1082                                    uint8* dst_argb,
1083                                    int width);
1084 void I411ToARGBRow_Unaligned_SSSE3(const uint8* src_y,
1085                                    const uint8* src_u,
1086                                    const uint8* src_v,
1087                                    uint8* dst_argb,
1088                                    int width);
1089 void NV12ToARGBRow_Unaligned_SSSE3(const uint8* src_y,
1090                                    const uint8* src_uv,
1091                                    uint8* dst_argb,
1092                                    int width);
1093 void NV21ToARGBRow_Unaligned_SSSE3(const uint8* src_y,
1094                                    const uint8* src_vu,
1095                                    uint8* dst_argb,
1096                                    int width);
1097 void YUY2ToARGBRow_Unaligned_SSSE3(const uint8* src_yuy2,
1098                                    uint8* dst_argb,
1099                                    int width);
1100 void UYVYToARGBRow_Unaligned_SSSE3(const uint8* src_uyvy,
1101                                    uint8* dst_argb,
1102                                    int width);
1103 void I422ToBGRARow_Unaligned_SSSE3(const uint8* src_y,
1104                                    const uint8* src_u,
1105                                    const uint8* src_v,
1106                                    uint8* dst_bgra,
1107                                    int width);
1108 void I422ToABGRRow_Unaligned_SSSE3(const uint8* src_y,
1109                                    const uint8* src_u,
1110                                    const uint8* src_v,
1111                                    uint8* dst_abgr,
1112                                    int width);
1113 void I422ToRGBARow_Unaligned_SSSE3(const uint8* src_y,
1114                                    const uint8* src_u,
1115                                    const uint8* src_v,
1116                                    uint8* dst_rgba,
1117                                    int width);
1118 void I422ToARGBRow_Any_AVX2(const uint8* src_y,
1119                             const uint8* src_u,
1120                             const uint8* src_v,
1121                             uint8* dst_argb,
1122                             int width);
1123 void I444ToARGBRow_Any_SSSE3(const uint8* src_y,
1124                              const uint8* src_u,
1125                              const uint8* src_v,
1126                              uint8* dst_argb,
1127                              int width);
1128 void I422ToARGBRow_Any_SSSE3(const uint8* src_y,
1129                              const uint8* src_u,
1130                              const uint8* src_v,
1131                              uint8* dst_argb,
1132                              int width);
1133 void I411ToARGBRow_Any_SSSE3(const uint8* src_y,
1134                              const uint8* src_u,
1135                              const uint8* src_v,
1136                              uint8* dst_argb,
1137                              int width);
1138 void NV12ToARGBRow_Any_SSSE3(const uint8* src_y,
1139                              const uint8* src_uv,
1140                              uint8* dst_argb,
1141                              int width);
1142 void NV21ToARGBRow_Any_SSSE3(const uint8* src_y,
1143                              const uint8* src_vu,
1144                              uint8* dst_argb,
1145                              int width);
1146 void NV12ToRGB565Row_Any_SSSE3(const uint8* src_y,
1147                                const uint8* src_uv,
1148                                uint8* dst_argb,
1149                                int width);
1150 void NV21ToRGB565Row_Any_SSSE3(const uint8* src_y,
1151                                const uint8* src_vu,
1152                                uint8* dst_argb,
1153                                int width);
1154 void YUY2ToARGBRow_Any_SSSE3(const uint8* src_yuy2,
1155                              uint8* dst_argb,
1156                              int width);
1157 void UYVYToARGBRow_Any_SSSE3(const uint8* src_uyvy,
1158                              uint8* dst_argb,
1159                              int width);
1160 void I422ToBGRARow_Any_SSSE3(const uint8* src_y,
1161                              const uint8* src_u,
1162                              const uint8* src_v,
1163                              uint8* dst_bgra,
1164                              int width);
1165 void I422ToABGRRow_Any_SSSE3(const uint8* src_y,
1166                              const uint8* src_u,
1167                              const uint8* src_v,
1168                              uint8* dst_abgr,
1169                              int width);
1170 void I422ToRGBARow_Any_SSSE3(const uint8* src_y,
1171                              const uint8* src_u,
1172                              const uint8* src_v,
1173                              uint8* dst_rgba,
1174                              int width);
1175 void I422ToARGB4444Row_Any_SSSE3(const uint8* src_y,
1176                                  const uint8* src_u,
1177                                  const uint8* src_v,
1178                                  uint8* dst_rgba,
1179                                  int width);
1180 void I422ToARGB1555Row_Any_SSSE3(const uint8* src_y,
1181                                  const uint8* src_u,
1182                                  const uint8* src_v,
1183                                  uint8* dst_rgba,
1184                                  int width);
1185 void I422ToRGB565Row_Any_SSSE3(const uint8* src_y,
1186                                const uint8* src_u,
1187                                const uint8* src_v,
1188                                uint8* dst_rgba,
1189                                int width);
1190 // RGB24/RAW are unaligned.
1191 void I422ToRGB24Row_Any_SSSE3(const uint8* src_y,
1192                               const uint8* src_u,
1193                               const uint8* src_v,
1194                               uint8* dst_argb,
1195                               int width);
1196 void I422ToRAWRow_Any_SSSE3(const uint8* src_y,
1197                             const uint8* src_u,
1198                             const uint8* src_v,
1199                             uint8* dst_argb,
1200                             int width);
1201 void YToARGBRow_SSE2(const uint8* src_y,
1202                      uint8* dst_argb,
1203                      int width);
1204 void YToARGBRow_NEON(const uint8* src_y,
1205                      uint8* dst_argb,
1206                      int width);
1207 void YToARGBRow_Any_SSE2(const uint8* src_y,
1208                          uint8* dst_argb,
1209                          int width);
1210 void YToARGBRow_Any_NEON(const uint8* src_y,
1211                          uint8* dst_argb,
1212                          int width);
1213 
1214 // ARGB preattenuated alpha blend.
1215 void ARGBBlendRow_SSSE3(const uint8* src_argb, const uint8* src_argb1,
1216                         uint8* dst_argb, int width);
1217 void ARGBBlendRow_SSE2(const uint8* src_argb, const uint8* src_argb1,
1218                        uint8* dst_argb, int width);
1219 void ARGBBlendRow_NEON(const uint8* src_argb, const uint8* src_argb1,
1220                        uint8* dst_argb, int width);
1221 void ARGBBlendRow_C(const uint8* src_argb, const uint8* src_argb1,
1222                     uint8* dst_argb, int width);
1223 
1224 // ARGB multiply images. Same API as Blend, but these require
1225 // pointer and width alignment for SSE2.
1226 void ARGBMultiplyRow_C(const uint8* src_argb, const uint8* src_argb1,
1227                        uint8* dst_argb, int width);
1228 void ARGBMultiplyRow_SSE2(const uint8* src_argb, const uint8* src_argb1,
1229                           uint8* dst_argb, int width);
1230 void ARGBMultiplyRow_Any_SSE2(const uint8* src_argb, const uint8* src_argb1,
1231                               uint8* dst_argb, int width);
1232 void ARGBMultiplyRow_AVX2(const uint8* src_argb, const uint8* src_argb1,
1233                           uint8* dst_argb, int width);
1234 void ARGBMultiplyRow_Any_AVX2(const uint8* src_argb, const uint8* src_argb1,
1235                               uint8* dst_argb, int width);
1236 void ARGBMultiplyRow_NEON(const uint8* src_argb, const uint8* src_argb1,
1237                           uint8* dst_argb, int width);
1238 void ARGBMultiplyRow_Any_NEON(const uint8* src_argb, const uint8* src_argb1,
1239                               uint8* dst_argb, int width);
1240 
1241 // ARGB add images.
1242 void ARGBAddRow_C(const uint8* src_argb, const uint8* src_argb1,
1243                   uint8* dst_argb, int width);
1244 void ARGBAddRow_SSE2(const uint8* src_argb, const uint8* src_argb1,
1245                      uint8* dst_argb, int width);
1246 void ARGBAddRow_Any_SSE2(const uint8* src_argb, const uint8* src_argb1,
1247                          uint8* dst_argb, int width);
1248 void ARGBAddRow_AVX2(const uint8* src_argb, const uint8* src_argb1,
1249                      uint8* dst_argb, int width);
1250 void ARGBAddRow_Any_AVX2(const uint8* src_argb, const uint8* src_argb1,
1251                          uint8* dst_argb, int width);
1252 void ARGBAddRow_NEON(const uint8* src_argb, const uint8* src_argb1,
1253                      uint8* dst_argb, int width);
1254 void ARGBAddRow_Any_NEON(const uint8* src_argb, const uint8* src_argb1,
1255                          uint8* dst_argb, int width);
1256 
1257 // ARGB subtract images. Same API as Blend, but these require
1258 // pointer and width alignment for SSE2.
1259 void ARGBSubtractRow_C(const uint8* src_argb, const uint8* src_argb1,
1260                        uint8* dst_argb, int width);
1261 void ARGBSubtractRow_SSE2(const uint8* src_argb, const uint8* src_argb1,
1262                           uint8* dst_argb, int width);
1263 void ARGBSubtractRow_Any_SSE2(const uint8* src_argb, const uint8* src_argb1,
1264                               uint8* dst_argb, int width);
1265 void ARGBSubtractRow_AVX2(const uint8* src_argb, const uint8* src_argb1,
1266                           uint8* dst_argb, int width);
1267 void ARGBSubtractRow_Any_AVX2(const uint8* src_argb, const uint8* src_argb1,
1268                               uint8* dst_argb, int width);
1269 void ARGBSubtractRow_NEON(const uint8* src_argb, const uint8* src_argb1,
1270                           uint8* dst_argb, int width);
1271 void ARGBSubtractRow_Any_NEON(const uint8* src_argb, const uint8* src_argb1,
1272                               uint8* dst_argb, int width);
1273 
1274 void ARGBToRGB24Row_Any_SSSE3(const uint8* src_argb, uint8* dst_rgb, int pix);
1275 void ARGBToRAWRow_Any_SSSE3(const uint8* src_argb, uint8* dst_rgb, int pix);
1276 void ARGBToRGB565Row_Any_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix);
1277 void ARGBToARGB1555Row_Any_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix);
1278 void ARGBToARGB4444Row_Any_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix);
1279 
1280 void ARGBToRGB24Row_Any_NEON(const uint8* src_argb, uint8* dst_rgb, int pix);
1281 void ARGBToRAWRow_Any_NEON(const uint8* src_argb, uint8* dst_rgb, int pix);
1282 void ARGBToRGB565Row_Any_NEON(const uint8* src_argb, uint8* dst_rgb, int pix);
1283 void ARGBToARGB1555Row_Any_NEON(const uint8* src_argb, uint8* dst_rgb, int pix);
1284 void ARGBToARGB4444Row_Any_NEON(const uint8* src_argb, uint8* dst_rgb, int pix);
1285 
1286 void I444ToARGBRow_Any_NEON(const uint8* src_y,
1287                             const uint8* src_u,
1288                             const uint8* src_v,
1289                             uint8* dst_argb,
1290                             int width);
1291 void I422ToARGBRow_Any_NEON(const uint8* src_y,
1292                             const uint8* src_u,
1293                             const uint8* src_v,
1294                             uint8* dst_argb,
1295                             int width);
1296 void I411ToARGBRow_Any_NEON(const uint8* src_y,
1297                             const uint8* src_u,
1298                             const uint8* src_v,
1299                             uint8* dst_argb,
1300                             int width);
1301 void I422ToBGRARow_Any_NEON(const uint8* src_y,
1302                             const uint8* src_u,
1303                             const uint8* src_v,
1304                             uint8* dst_argb,
1305                             int width);
1306 void I422ToABGRRow_Any_NEON(const uint8* src_y,
1307                             const uint8* src_u,
1308                             const uint8* src_v,
1309                             uint8* dst_argb,
1310                             int width);
1311 void I422ToRGBARow_Any_NEON(const uint8* src_y,
1312                             const uint8* src_u,
1313                             const uint8* src_v,
1314                             uint8* dst_argb,
1315                             int width);
1316 void I422ToRGB24Row_Any_NEON(const uint8* src_y,
1317                              const uint8* src_u,
1318                              const uint8* src_v,
1319                              uint8* dst_argb,
1320                              int width);
1321 void I422ToRAWRow_Any_NEON(const uint8* src_y,
1322                            const uint8* src_u,
1323                            const uint8* src_v,
1324                            uint8* dst_argb,
1325                            int width);
1326 void I422ToARGB4444Row_Any_NEON(const uint8* src_y,
1327                                 const uint8* src_u,
1328                                 const uint8* src_v,
1329                                 uint8* dst_argb,
1330                                 int width);
1331 void I422ToARGB1555Row_Any_NEON(const uint8* src_y,
1332                                 const uint8* src_u,
1333                                 const uint8* src_v,
1334                                 uint8* dst_argb,
1335                                 int width);
1336 void I422ToRGB565Row_Any_NEON(const uint8* src_y,
1337                               const uint8* src_u,
1338                               const uint8* src_v,
1339                               uint8* dst_argb,
1340                               int width);
1341 void NV12ToARGBRow_Any_NEON(const uint8* src_y,
1342                             const uint8* src_uv,
1343                             uint8* dst_argb,
1344                             int width);
1345 void NV21ToARGBRow_Any_NEON(const uint8* src_y,
1346                             const uint8* src_uv,
1347                             uint8* dst_argb,
1348                             int width);
1349 void NV12ToRGB565Row_Any_NEON(const uint8* src_y,
1350                               const uint8* src_uv,
1351                               uint8* dst_argb,
1352                               int width);
1353 void NV21ToRGB565Row_Any_NEON(const uint8* src_y,
1354                               const uint8* src_uv,
1355                               uint8* dst_argb,
1356                               int width);
1357 void YUY2ToARGBRow_Any_NEON(const uint8* src_yuy2,
1358                             uint8* dst_argb,
1359                             int width);
1360 void UYVYToARGBRow_Any_NEON(const uint8* src_uyvy,
1361                             uint8* dst_argb,
1362                             int width);
1363 void I422ToARGBRow_MIPS_DSPR2(const uint8* src_y,
1364                               const uint8* src_u,
1365                               const uint8* src_v,
1366                               uint8* dst_argb,
1367                               int width);
1368 void I422ToBGRARow_MIPS_DSPR2(const uint8* src_y,
1369                               const uint8* src_u,
1370                               const uint8* src_v,
1371                               uint8* dst_argb,
1372                               int width);
1373 void I422ToABGRRow_MIPS_DSPR2(const uint8* src_y,
1374                               const uint8* src_u,
1375                               const uint8* src_v,
1376                               uint8* dst_argb,
1377                               int width);
1378 void I422ToARGBRow_MIPS_DSPR2(const uint8* src_y,
1379                               const uint8* src_u,
1380                               const uint8* src_v,
1381                               uint8* dst_argb,
1382                               int width);
1383 void I422ToBGRARow_MIPS_DSPR2(const uint8* src_y,
1384                               const uint8* src_u,
1385                               const uint8* src_v,
1386                               uint8* dst_argb,
1387                               int width);
1388 void I422ToABGRRow_MIPS_DSPR2(const uint8* src_y,
1389                               const uint8* src_u,
1390                               const uint8* src_v,
1391                               uint8* dst_argb,
1392                               int width);
1393 
1394 void YUY2ToYRow_AVX2(const uint8* src_yuy2, uint8* dst_y, int pix);
1395 void YUY2ToUVRow_AVX2(const uint8* src_yuy2, int stride_yuy2,
1396                       uint8* dst_u, uint8* dst_v, int pix);
1397 void YUY2ToUV422Row_AVX2(const uint8* src_yuy2,
1398                          uint8* dst_u, uint8* dst_v, int pix);
1399 void YUY2ToYRow_SSE2(const uint8* src_yuy2, uint8* dst_y, int pix);
1400 void YUY2ToUVRow_SSE2(const uint8* src_yuy2, int stride_yuy2,
1401                       uint8* dst_u, uint8* dst_v, int pix);
1402 void YUY2ToUV422Row_SSE2(const uint8* src_yuy2,
1403                          uint8* dst_u, uint8* dst_v, int pix);
1404 void YUY2ToYRow_Unaligned_SSE2(const uint8* src_yuy2,
1405                                uint8* dst_y, int pix);
1406 void YUY2ToUVRow_Unaligned_SSE2(const uint8* src_yuy2, int stride_yuy2,
1407                                 uint8* dst_u, uint8* dst_v, int pix);
1408 void YUY2ToUV422Row_Unaligned_SSE2(const uint8* src_yuy2,
1409                                    uint8* dst_u, uint8* dst_v, int pix);
1410 void YUY2ToYRow_NEON(const uint8* src_yuy2, uint8* dst_y, int pix);
1411 void YUY2ToUVRow_NEON(const uint8* src_yuy2, int stride_yuy2,
1412                       uint8* dst_u, uint8* dst_v, int pix);
1413 void YUY2ToUV422Row_NEON(const uint8* src_yuy2,
1414                          uint8* dst_u, uint8* dst_v, int pix);
1415 void YUY2ToYRow_C(const uint8* src_yuy2, uint8* dst_y, int pix);
1416 void YUY2ToUVRow_C(const uint8* src_yuy2, int stride_yuy2,
1417                    uint8* dst_u, uint8* dst_v, int pix);
1418 void YUY2ToUV422Row_C(const uint8* src_yuy2,
1419                       uint8* dst_u, uint8* dst_v, int pix);
1420 void YUY2ToYRow_Any_AVX2(const uint8* src_yuy2, uint8* dst_y, int pix);
1421 void YUY2ToUVRow_Any_AVX2(const uint8* src_yuy2, int stride_yuy2,
1422                           uint8* dst_u, uint8* dst_v, int pix);
1423 void YUY2ToUV422Row_Any_AVX2(const uint8* src_yuy2,
1424                              uint8* dst_u, uint8* dst_v, int pix);
1425 void YUY2ToYRow_Any_SSE2(const uint8* src_yuy2, uint8* dst_y, int pix);
1426 void YUY2ToUVRow_Any_SSE2(const uint8* src_yuy2, int stride_yuy2,
1427                           uint8* dst_u, uint8* dst_v, int pix);
1428 void YUY2ToUV422Row_Any_SSE2(const uint8* src_yuy2,
1429                              uint8* dst_u, uint8* dst_v, int pix);
1430 void YUY2ToYRow_Any_NEON(const uint8* src_yuy2, uint8* dst_y, int pix);
1431 void YUY2ToUVRow_Any_NEON(const uint8* src_yuy2, int stride_yuy2,
1432                           uint8* dst_u, uint8* dst_v, int pix);
1433 void YUY2ToUV422Row_Any_NEON(const uint8* src_yuy2,
1434                              uint8* dst_u, uint8* dst_v, int pix);
1435 void UYVYToYRow_AVX2(const uint8* src_uyvy, uint8* dst_y, int pix);
1436 void UYVYToUVRow_AVX2(const uint8* src_uyvy, int stride_uyvy,
1437                       uint8* dst_u, uint8* dst_v, int pix);
1438 void UYVYToUV422Row_AVX2(const uint8* src_uyvy,
1439                          uint8* dst_u, uint8* dst_v, int pix);
1440 void UYVYToYRow_SSE2(const uint8* src_uyvy, uint8* dst_y, int pix);
1441 void UYVYToUVRow_SSE2(const uint8* src_uyvy, int stride_uyvy,
1442                       uint8* dst_u, uint8* dst_v, int pix);
1443 void UYVYToUV422Row_SSE2(const uint8* src_uyvy,
1444                          uint8* dst_u, uint8* dst_v, int pix);
1445 void UYVYToYRow_Unaligned_SSE2(const uint8* src_uyvy,
1446                                uint8* dst_y, int pix);
1447 void UYVYToUVRow_Unaligned_SSE2(const uint8* src_uyvy, int stride_uyvy,
1448                                 uint8* dst_u, uint8* dst_v, int pix);
1449 void UYVYToUV422Row_Unaligned_SSE2(const uint8* src_uyvy,
1450                                    uint8* dst_u, uint8* dst_v, int pix);
1451 void UYVYToYRow_AVX2(const uint8* src_uyvy, uint8* dst_y, int pix);
1452 void UYVYToUVRow_AVX2(const uint8* src_uyvy, int stride_uyvy,
1453                       uint8* dst_u, uint8* dst_v, int pix);
1454 void UYVYToUV422Row_AVX2(const uint8* src_uyvy,
1455                          uint8* dst_u, uint8* dst_v, int pix);
1456 void UYVYToYRow_NEON(const uint8* src_uyvy, uint8* dst_y, int pix);
1457 void UYVYToUVRow_NEON(const uint8* src_uyvy, int stride_uyvy,
1458                       uint8* dst_u, uint8* dst_v, int pix);
1459 void UYVYToUV422Row_NEON(const uint8* src_uyvy,
1460                          uint8* dst_u, uint8* dst_v, int pix);
1461 
1462 void UYVYToYRow_C(const uint8* src_uyvy, uint8* dst_y, int pix);
1463 void UYVYToUVRow_C(const uint8* src_uyvy, int stride_uyvy,
1464                    uint8* dst_u, uint8* dst_v, int pix);
1465 void UYVYToUV422Row_C(const uint8* src_uyvy,
1466                       uint8* dst_u, uint8* dst_v, int pix);
1467 void UYVYToYRow_Any_AVX2(const uint8* src_uyvy, uint8* dst_y, int pix);
1468 void UYVYToUVRow_Any_AVX2(const uint8* src_uyvy, int stride_uyvy,
1469                           uint8* dst_u, uint8* dst_v, int pix);
1470 void UYVYToUV422Row_Any_AVX2(const uint8* src_uyvy,
1471                              uint8* dst_u, uint8* dst_v, int pix);
1472 void UYVYToYRow_Any_SSE2(const uint8* src_uyvy, uint8* dst_y, int pix);
1473 void UYVYToUVRow_Any_SSE2(const uint8* src_uyvy, int stride_uyvy,
1474                           uint8* dst_u, uint8* dst_v, int pix);
1475 void UYVYToUV422Row_Any_SSE2(const uint8* src_uyvy,
1476                              uint8* dst_u, uint8* dst_v, int pix);
1477 void UYVYToYRow_Any_NEON(const uint8* src_uyvy, uint8* dst_y, int pix);
1478 void UYVYToUVRow_Any_NEON(const uint8* src_uyvy, int stride_uyvy,
1479                           uint8* dst_u, uint8* dst_v, int pix);
1480 void UYVYToUV422Row_Any_NEON(const uint8* src_uyvy,
1481                              uint8* dst_u, uint8* dst_v, int pix);
1482 
1483 void HalfRow_C(const uint8* src_uv, int src_uv_stride,
1484                uint8* dst_uv, int pix);
1485 void HalfRow_SSE2(const uint8* src_uv, int src_uv_stride,
1486                   uint8* dst_uv, int pix);
1487 void HalfRow_AVX2(const uint8* src_uv, int src_uv_stride,
1488                   uint8* dst_uv, int pix);
1489 void HalfRow_NEON(const uint8* src_uv, int src_uv_stride,
1490                   uint8* dst_uv, int pix);
1491 
1492 void HalfRow_16_C(const uint16* src_uv, int src_uv_stride,
1493                   uint16* dst_uv, int pix);
1494 
1495 void ARGBToBayerRow_C(const uint8* src_argb, uint8* dst_bayer,
1496                       uint32 selector, int pix);
1497 void ARGBToBayerRow_SSSE3(const uint8* src_argb, uint8* dst_bayer,
1498                           uint32 selector, int pix);
1499 void ARGBToBayerRow_NEON(const uint8* src_argb, uint8* dst_bayer,
1500                          uint32 selector, int pix);
1501 void ARGBToBayerRow_Any_SSSE3(const uint8* src_argb, uint8* dst_bayer,
1502                               uint32 selector, int pix);
1503 void ARGBToBayerRow_Any_NEON(const uint8* src_argb, uint8* dst_bayer,
1504                              uint32 selector, int pix);
1505 void ARGBToBayerGGRow_C(const uint8* src_argb, uint8* dst_bayer,
1506                         uint32 /* selector */, int pix);
1507 void ARGBToBayerGGRow_SSE2(const uint8* src_argb, uint8* dst_bayer,
1508                            uint32 /* selector */, int pix);
1509 void ARGBToBayerGGRow_NEON(const uint8* src_argb, uint8* dst_bayer,
1510                            uint32 /* selector */, int pix);
1511 void ARGBToBayerGGRow_Any_SSE2(const uint8* src_argb, uint8* dst_bayer,
1512                                uint32 /* selector */, int pix);
1513 void ARGBToBayerGGRow_Any_NEON(const uint8* src_argb, uint8* dst_bayer,
1514                                uint32 /* selector */, int pix);
1515 
1516 void I422ToYUY2Row_C(const uint8* src_y,
1517                      const uint8* src_u,
1518                      const uint8* src_v,
1519                      uint8* dst_yuy2, int width);
1520 void I422ToUYVYRow_C(const uint8* src_y,
1521                      const uint8* src_u,
1522                      const uint8* src_v,
1523                      uint8* dst_uyvy, int width);
1524 void I422ToYUY2Row_SSE2(const uint8* src_y,
1525                         const uint8* src_u,
1526                         const uint8* src_v,
1527                         uint8* dst_yuy2, int width);
1528 void I422ToUYVYRow_SSE2(const uint8* src_y,
1529                         const uint8* src_u,
1530                         const uint8* src_v,
1531                         uint8* dst_uyvy, int width);
1532 void I422ToYUY2Row_Any_SSE2(const uint8* src_y,
1533                             const uint8* src_u,
1534                             const uint8* src_v,
1535                             uint8* dst_yuy2, int width);
1536 void I422ToUYVYRow_Any_SSE2(const uint8* src_y,
1537                             const uint8* src_u,
1538                             const uint8* src_v,
1539                             uint8* dst_uyvy, int width);
1540 void I422ToYUY2Row_NEON(const uint8* src_y,
1541                         const uint8* src_u,
1542                         const uint8* src_v,
1543                         uint8* dst_yuy2, int width);
1544 void I422ToUYVYRow_NEON(const uint8* src_y,
1545                         const uint8* src_u,
1546                         const uint8* src_v,
1547                         uint8* dst_uyvy, int width);
1548 void I422ToYUY2Row_Any_NEON(const uint8* src_y,
1549                             const uint8* src_u,
1550                             const uint8* src_v,
1551                             uint8* dst_yuy2, int width);
1552 void I422ToUYVYRow_Any_NEON(const uint8* src_y,
1553                             const uint8* src_u,
1554                             const uint8* src_v,
1555                             uint8* dst_uyvy, int width);
1556 
1557 // Effects related row functions.
1558 void ARGBAttenuateRow_C(const uint8* src_argb, uint8* dst_argb, int width);
1559 void ARGBAttenuateRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width);
1560 void ARGBAttenuateRow_SSSE3(const uint8* src_argb, uint8* dst_argb, int width);
1561 void ARGBAttenuateRow_AVX2(const uint8* src_argb, uint8* dst_argb, int width);
1562 void ARGBAttenuateRow_NEON(const uint8* src_argb, uint8* dst_argb, int width);
1563 void ARGBAttenuateRow_Any_SSE2(const uint8* src_argb, uint8* dst_argb,
1564                                int width);
1565 void ARGBAttenuateRow_Any_SSSE3(const uint8* src_argb, uint8* dst_argb,
1566                                 int width);
1567 void ARGBAttenuateRow_Any_AVX2(const uint8* src_argb, uint8* dst_argb,
1568                                int width);
1569 void ARGBAttenuateRow_Any_NEON(const uint8* src_argb, uint8* dst_argb,
1570                                int width);
1571 
1572 // Inverse table for unattenuate, shared by C and SSE2.
1573 extern const uint32 fixed_invtbl8[256];
1574 void ARGBUnattenuateRow_C(const uint8* src_argb, uint8* dst_argb, int width);
1575 void ARGBUnattenuateRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width);
1576 void ARGBUnattenuateRow_AVX2(const uint8* src_argb, uint8* dst_argb, int width);
1577 void ARGBUnattenuateRow_Any_SSE2(const uint8* src_argb, uint8* dst_argb,
1578                                  int width);
1579 void ARGBUnattenuateRow_Any_AVX2(const uint8* src_argb, uint8* dst_argb,
1580                                  int width);
1581 
1582 void ARGBGrayRow_C(const uint8* src_argb, uint8* dst_argb, int width);
1583 void ARGBGrayRow_SSSE3(const uint8* src_argb, uint8* dst_argb, int width);
1584 void ARGBGrayRow_NEON(const uint8* src_argb, uint8* dst_argb, int width);
1585 
1586 void ARGBSepiaRow_C(uint8* dst_argb, int width);
1587 void ARGBSepiaRow_SSSE3(uint8* dst_argb, int width);
1588 void ARGBSepiaRow_NEON(uint8* dst_argb, int width);
1589 
1590 void ARGBColorMatrixRow_C(const uint8* src_argb, uint8* dst_argb,
1591                           const int8* matrix_argb, int width);
1592 void ARGBColorMatrixRow_SSSE3(const uint8* src_argb, uint8* dst_argb,
1593                               const int8* matrix_argb, int width);
1594 void ARGBColorMatrixRow_NEON(const uint8* src_argb, uint8* dst_argb,
1595                              const int8* matrix_argb, int width);
1596 
1597 void ARGBColorTableRow_C(uint8* dst_argb, const uint8* table_argb, int width);
1598 void ARGBColorTableRow_X86(uint8* dst_argb, const uint8* table_argb, int width);
1599 
1600 void RGBColorTableRow_C(uint8* dst_argb, const uint8* table_argb, int width);
1601 void RGBColorTableRow_X86(uint8* dst_argb, const uint8* table_argb, int width);
1602 
1603 void ARGBQuantizeRow_C(uint8* dst_argb, int scale, int interval_size,
1604                        int interval_offset, int width);
1605 void ARGBQuantizeRow_SSE2(uint8* dst_argb, int scale, int interval_size,
1606                           int interval_offset, int width);
1607 void ARGBQuantizeRow_NEON(uint8* dst_argb, int scale, int interval_size,
1608                           int interval_offset, int width);
1609 
1610 void ARGBShadeRow_C(const uint8* src_argb, uint8* dst_argb, int width,
1611                     uint32 value);
1612 void ARGBShadeRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width,
1613                        uint32 value);
1614 void ARGBShadeRow_NEON(const uint8* src_argb, uint8* dst_argb, int width,
1615                        uint32 value);
1616 
1617 // Used for blur.
1618 void CumulativeSumToAverageRow_SSE2(const int32* topleft, const int32* botleft,
1619                                     int width, int area, uint8* dst, int count);
1620 void ComputeCumulativeSumRow_SSE2(const uint8* row, int32* cumsum,
1621                                   const int32* previous_cumsum, int width);
1622 
1623 void CumulativeSumToAverageRow_C(const int32* topleft, const int32* botleft,
1624                                  int width, int area, uint8* dst, int count);
1625 void ComputeCumulativeSumRow_C(const uint8* row, int32* cumsum,
1626                                const int32* previous_cumsum, int width);
1627 
1628 LIBYUV_API
1629 void ARGBAffineRow_C(const uint8* src_argb, int src_argb_stride,
1630                      uint8* dst_argb, const float* uv_dudv, int width);
1631 LIBYUV_API
1632 void ARGBAffineRow_SSE2(const uint8* src_argb, int src_argb_stride,
1633                         uint8* dst_argb, const float* uv_dudv, int width);
1634 
1635 // Used for I420Scale, ARGBScale, and ARGBInterpolate.
1636 void InterpolateRow_C(uint8* dst_ptr, const uint8* src_ptr,
1637                       ptrdiff_t src_stride_ptr,
1638                       int width, int source_y_fraction);
1639 void InterpolateRow_SSE2(uint8* dst_ptr, const uint8* src_ptr,
1640                          ptrdiff_t src_stride_ptr, int width,
1641                          int source_y_fraction);
1642 void InterpolateRow_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
1643                           ptrdiff_t src_stride_ptr, int width,
1644                           int source_y_fraction);
1645 void InterpolateRow_AVX2(uint8* dst_ptr, const uint8* src_ptr,
1646                          ptrdiff_t src_stride_ptr, int width,
1647                          int source_y_fraction);
1648 void InterpolateRow_NEON(uint8* dst_ptr, const uint8* src_ptr,
1649                          ptrdiff_t src_stride_ptr, int width,
1650                          int source_y_fraction);
1651 void InterpolateRows_MIPS_DSPR2(uint8* dst_ptr, const uint8* src_ptr,
1652                                 ptrdiff_t src_stride_ptr, int width,
1653                                 int source_y_fraction);
1654 void InterpolateRow_Unaligned_SSE2(uint8* dst_ptr, const uint8* src_ptr,
1655                                    ptrdiff_t src_stride_ptr, int width,
1656                                    int source_y_fraction);
1657 void InterpolateRow_Unaligned_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
1658                                     ptrdiff_t src_stride_ptr, int width,
1659                                     int source_y_fraction);
1660 void InterpolateRow_Any_NEON(uint8* dst_ptr, const uint8* src_ptr,
1661                              ptrdiff_t src_stride_ptr, int width,
1662                              int source_y_fraction);
1663 void InterpolateRow_Any_SSE2(uint8* dst_ptr, const uint8* src_ptr,
1664                              ptrdiff_t src_stride_ptr, int width,
1665                              int source_y_fraction);
1666 void InterpolateRow_Any_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
1667                               ptrdiff_t src_stride_ptr, int width,
1668                               int source_y_fraction);
1669 void InterpolateRow_Any_AVX2(uint8* dst_ptr, const uint8* src_ptr,
1670                              ptrdiff_t src_stride_ptr, int width,
1671                              int source_y_fraction);
1672 void InterpolateRows_Any_MIPS_DSPR2(uint8* dst_ptr, const uint8* src_ptr,
1673                                     ptrdiff_t src_stride_ptr, int width,
1674                                     int source_y_fraction);
1675 
1676 void InterpolateRow_16_C(uint16* dst_ptr, const uint16* src_ptr,
1677                          ptrdiff_t src_stride_ptr,
1678                          int width, int source_y_fraction);
1679 
1680 // Sobel images.
1681 void SobelXRow_C(const uint8* src_y0, const uint8* src_y1, const uint8* src_y2,
1682                  uint8* dst_sobelx, int width);
1683 void SobelXRow_SSE2(const uint8* src_y0, const uint8* src_y1,
1684                     const uint8* src_y2, uint8* dst_sobelx, int width);
1685 void SobelXRow_NEON(const uint8* src_y0, const uint8* src_y1,
1686                     const uint8* src_y2, uint8* dst_sobelx, int width);
1687 void SobelYRow_C(const uint8* src_y0, const uint8* src_y1,
1688                  uint8* dst_sobely, int width);
1689 void SobelYRow_SSE2(const uint8* src_y0, const uint8* src_y1,
1690                     uint8* dst_sobely, int width);
1691 void SobelYRow_NEON(const uint8* src_y0, const uint8* src_y1,
1692                     uint8* dst_sobely, int width);
1693 void SobelRow_C(const uint8* src_sobelx, const uint8* src_sobely,
1694                 uint8* dst_argb, int width);
1695 void SobelRow_SSE2(const uint8* src_sobelx, const uint8* src_sobely,
1696                    uint8* dst_argb, int width);
1697 void SobelRow_NEON(const uint8* src_sobelx, const uint8* src_sobely,
1698                    uint8* dst_argb, int width);
1699 void SobelToPlaneRow_C(const uint8* src_sobelx, const uint8* src_sobely,
1700                        uint8* dst_y, int width);
1701 void SobelToPlaneRow_SSE2(const uint8* src_sobelx, const uint8* src_sobely,
1702                           uint8* dst_y, int width);
1703 void SobelToPlaneRow_NEON(const uint8* src_sobelx, const uint8* src_sobely,
1704                           uint8* dst_y, int width);
1705 void SobelXYRow_C(const uint8* src_sobelx, const uint8* src_sobely,
1706                   uint8* dst_argb, int width);
1707 void SobelXYRow_SSE2(const uint8* src_sobelx, const uint8* src_sobely,
1708                      uint8* dst_argb, int width);
1709 void SobelXYRow_NEON(const uint8* src_sobelx, const uint8* src_sobely,
1710                      uint8* dst_argb, int width);
1711 
1712 void ARGBPolynomialRow_C(const uint8* src_argb,
1713                          uint8* dst_argb, const float* poly,
1714                          int width);
1715 void ARGBPolynomialRow_SSE2(const uint8* src_argb,
1716                             uint8* dst_argb, const float* poly,
1717                             int width);
1718 void ARGBPolynomialRow_AVX2(const uint8* src_argb,
1719                             uint8* dst_argb, const float* poly,
1720                             int width);
1721 
1722 void ARGBLumaColorTableRow_C(const uint8* src_argb, uint8* dst_argb, int width,
1723                              const uint8* luma, uint32 lumacoeff);
1724 void ARGBLumaColorTableRow_SSSE3(const uint8* src_argb, uint8* dst_argb,
1725                                  int width,
1726                                  const uint8* luma, uint32 lumacoeff);
1727 
1728 #ifdef __cplusplus
1729 }  // extern "C"
1730 }  // namespace libyuv
1731 #endif
1732 
1733 #endif  // INCLUDE_LIBYUV_ROW_H_  NOLINT
1734