1 /* 2 * Copyright 2011 The LibYuv Project Authors. All rights reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 #ifndef INCLUDE_LIBYUV_ROW_H_ // NOLINT 12 #define INCLUDE_LIBYUV_ROW_H_ 13 14 #include <stdlib.h> // For malloc. 15 16 #include "libyuv/basic_types.h" 17 18 #if defined(__native_client__) 19 #include "ppapi/c/pp_macros.h" // For PPAPI_RELEASE 20 #endif 21 22 #ifdef __cplusplus 23 namespace libyuv { 24 extern "C" { 25 #endif 26 27 #define IS_ALIGNED(p, a) (!((uintptr_t)(p) & ((a) - 1))) 28 29 #ifdef __cplusplus 30 #define align_buffer_64(var, size) \ 31 uint8* var##_mem = reinterpret_cast<uint8*>(malloc((size) + 63)); \ 32 uint8* var = reinterpret_cast<uint8*> \ 33 ((reinterpret_cast<intptr_t>(var##_mem) + 63) & ~63) 34 #else 35 #define align_buffer_64(var, size) \ 36 uint8* var##_mem = (uint8*)(malloc((size) + 63)); /* NOLINT */ \ 37 uint8* var = (uint8*)(((intptr_t)(var##_mem) + 63) & ~63) /* NOLINT */ 38 #endif 39 40 #define free_aligned_buffer_64(var) \ 41 free(var##_mem); \ 42 var = 0 43 44 #if defined(__pnacl__) || defined(__CLR_VER) || defined(COVERAGE_ENABLED) || \ 45 defined(TARGET_IPHONE_SIMULATOR) || \ 46 (defined(_MSC_VER) && defined(__clang__)) 47 #define LIBYUV_DISABLE_X86 48 #endif 49 // True if compiling for SSSE3 as a requirement. 50 #if defined(__SSSE3__) || (defined(_M_IX86_FP) && (_M_IX86_FP >= 3)) 51 #define LIBYUV_SSSE3_ONLY 52 #endif 53 54 // Enable for NaCL pepper 33 for bundle and AVX2 support. 55 #if defined(__native_client__) && PPAPI_RELEASE >= 33 56 #define NEW_BINUTILS 57 #endif 58 #if defined(__native_client__) && defined(__arm__) && PPAPI_RELEASE < 37 59 #define LIBYUV_DISABLE_NEON 60 #endif 61 62 // The following are available on all x86 platforms: 63 #if !defined(LIBYUV_DISABLE_X86) && \ 64 (defined(_M_IX86) || defined(__x86_64__) || defined(__i386__)) 65 // Effects: 66 #define HAS_ARGBADDROW_SSE2 67 #define HAS_ARGBAFFINEROW_SSE2 68 #define HAS_ARGBATTENUATEROW_SSSE3 69 #define HAS_ARGBBLENDROW_SSSE3 70 #define HAS_ARGBCOLORMATRIXROW_SSSE3 71 #define HAS_ARGBCOLORTABLEROW_X86 72 #define HAS_ARGBCOPYALPHAROW_SSE2 73 #define HAS_ARGBCOPYYTOALPHAROW_SSE2 74 #define HAS_ARGBGRAYROW_SSSE3 75 #define HAS_ARGBLUMACOLORTABLEROW_SSSE3 76 #define HAS_ARGBMIRRORROW_SSSE3 77 #define HAS_ARGBMULTIPLYROW_SSE2 78 #define HAS_ARGBPOLYNOMIALROW_SSE2 79 #define HAS_ARGBQUANTIZEROW_SSE2 80 #define HAS_ARGBSEPIAROW_SSSE3 81 #define HAS_ARGBSHADEROW_SSE2 82 #define HAS_ARGBSUBTRACTROW_SSE2 83 #define HAS_ARGBTOUVROW_SSSE3 84 #define HAS_ARGBUNATTENUATEROW_SSE2 85 #define HAS_COMPUTECUMULATIVESUMROW_SSE2 86 #define HAS_CUMULATIVESUMTOAVERAGEROW_SSE2 87 #define HAS_INTERPOLATEROW_SSE2 88 #define HAS_INTERPOLATEROW_SSSE3 89 #define HAS_RGBCOLORTABLEROW_X86 90 #define HAS_SOBELROW_SSE2 91 #define HAS_SOBELTOPLANEROW_SSE2 92 #define HAS_SOBELXROW_SSE2 93 #define HAS_SOBELXYROW_SSE2 94 #define HAS_SOBELYROW_SSE2 95 96 // Conversions: 97 #define HAS_ABGRTOUVROW_SSSE3 98 #define HAS_ABGRTOYROW_SSSE3 99 #define HAS_ARGB1555TOARGBROW_SSE2 100 #define HAS_ARGB4444TOARGBROW_SSE2 101 #define HAS_ARGBSHUFFLEROW_SSE2 102 #define HAS_ARGBSHUFFLEROW_SSSE3 103 #define HAS_ARGBTOARGB1555ROW_SSE2 104 #define HAS_ARGBTOARGB4444ROW_SSE2 105 #define HAS_ARGBTOBAYERGGROW_SSE2 106 #define HAS_ARGBTOBAYERROW_SSSE3 107 #define HAS_ARGBTORAWROW_SSSE3 108 #define HAS_ARGBTORGB24ROW_SSSE3 109 #define HAS_ARGBTORGB565ROW_SSE2 110 #define HAS_ARGBTOUV422ROW_SSSE3 111 #define HAS_ARGBTOUV444ROW_SSSE3 112 #define HAS_ARGBTOUVJROW_SSSE3 113 #define HAS_ARGBTOYJROW_SSSE3 114 #define HAS_ARGBTOYROW_SSSE3 115 #define HAS_BGRATOUVROW_SSSE3 116 #define HAS_BGRATOYROW_SSSE3 117 #define HAS_COPYROW_ERMS 118 #define HAS_COPYROW_SSE2 119 #define HAS_COPYROW_X86 120 #define HAS_HALFROW_SSE2 121 #define HAS_I400TOARGBROW_SSE2 122 #define HAS_I411TOARGBROW_SSSE3 123 #define HAS_I422TOARGB1555ROW_SSSE3 124 #define HAS_I422TOABGRROW_SSSE3 125 #define HAS_I422TOARGB1555ROW_SSSE3 126 #define HAS_I422TOARGB4444ROW_SSSE3 127 #define HAS_I422TOARGBROW_SSSE3 128 #define HAS_I422TOBGRAROW_SSSE3 129 #define HAS_I422TORAWROW_SSSE3 130 #define HAS_I422TORGB24ROW_SSSE3 131 #define HAS_I422TORGB565ROW_SSSE3 132 #define HAS_I422TORGBAROW_SSSE3 133 #define HAS_I422TOUYVYROW_SSE2 134 #define HAS_I422TOYUY2ROW_SSE2 135 #define HAS_I444TOARGBROW_SSSE3 136 #define HAS_MERGEUVROW_SSE2 137 #define HAS_MIRRORROW_SSE2 138 #define HAS_MIRRORROW_SSSE3 139 #define HAS_MIRRORROW_UV_SSSE3 140 #define HAS_MIRRORUVROW_SSSE3 141 #define HAS_NV12TOARGBROW_SSSE3 142 #define HAS_NV12TORGB565ROW_SSSE3 143 #define HAS_NV21TOARGBROW_SSSE3 144 #define HAS_NV21TORGB565ROW_SSSE3 145 #define HAS_RAWTOARGBROW_SSSE3 146 #define HAS_RAWTOYROW_SSSE3 147 #define HAS_RGB24TOARGBROW_SSSE3 148 #define HAS_RGB24TOYROW_SSSE3 149 #define HAS_RGB565TOARGBROW_SSE2 150 #define HAS_RGBATOUVROW_SSSE3 151 #define HAS_RGBATOYROW_SSSE3 152 #define HAS_SETROW_X86 153 #define HAS_SPLITUVROW_SSE2 154 #define HAS_UYVYTOARGBROW_SSSE3 155 #define HAS_UYVYTOUV422ROW_SSE2 156 #define HAS_UYVYTOUVROW_SSE2 157 #define HAS_UYVYTOYROW_SSE2 158 #define HAS_YTOARGBROW_SSE2 159 #define HAS_YUY2TOARGBROW_SSSE3 160 #define HAS_YUY2TOUV422ROW_SSE2 161 #define HAS_YUY2TOUVROW_SSE2 162 #define HAS_YUY2TOYROW_SSE2 163 #endif 164 165 // The following are available on x64 Visual C: 166 #if !defined(LIBYUV_DISABLE_X86) && defined (_M_X64) 167 #define HAS_I422TOARGBROW_SSSE3 168 #endif 169 170 // GCC >= 4.7.0 required for AVX2. 171 #if defined(__GNUC__) && (defined(__x86_64__) || defined(__i386__)) 172 #if (__GNUC__ > 4) || (__GNUC__ == 4 && (__GNUC_MINOR__ >= 7)) 173 #define GCC_HAS_AVX2 1 174 #endif // GNUC >= 4.7 175 #endif // __GNUC__ 176 177 // clang >= 3.4.0 required for AVX2. 178 #if defined(__clang__) && (defined(__x86_64__) || defined(__i386__)) 179 #if (__clang_major__ > 3) || (__clang_major__ == 3 && (__clang_minor__ >= 4)) 180 #define CLANG_HAS_AVX2 1 181 #endif // clang >= 3.4 182 #endif // __clang__ 183 184 // Visual C 2012 required for AVX2. 185 #if defined(_M_IX86) && defined(_MSC_VER) && _MSC_VER >= 1700 186 #define VISUALC_HAS_AVX2 1 187 #endif // VisualStudio >= 2012 188 189 // The following are available on all x86 platforms, but 190 // require VS2012, clang 3.4 or gcc 4.7. 191 // The code supports NaCL but requires a new compiler and validator. 192 #if !defined(LIBYUV_DISABLE_X86) && (defined(VISUALC_HAS_AVX2) || \ 193 defined(CLANG_HAS_AVX2) || defined(GCC_HAS_AVX2)) 194 // Effects: 195 #define HAS_ARGBPOLYNOMIALROW_AVX2 196 #define HAS_ARGBSHUFFLEROW_AVX2 197 #define HAS_ARGBCOPYALPHAROW_AVX2 198 #define HAS_ARGBCOPYYTOALPHAROW_AVX2 199 #endif 200 201 // The following are require VS2012. 202 // TODO(fbarchard): Port to gcc. 203 #if !defined(LIBYUV_DISABLE_X86) && defined(VISUALC_HAS_AVX2) 204 #define HAS_ARGBTOUVROW_AVX2 205 #define HAS_ARGBTOYJROW_AVX2 206 #define HAS_ARGBTOYROW_AVX2 207 #define HAS_HALFROW_AVX2 208 #define HAS_I422TOARGBROW_AVX2 209 #define HAS_INTERPOLATEROW_AVX2 210 #define HAS_MERGEUVROW_AVX2 211 #define HAS_MIRRORROW_AVX2 212 #define HAS_SPLITUVROW_AVX2 213 #define HAS_UYVYTOUV422ROW_AVX2 214 #define HAS_UYVYTOUVROW_AVX2 215 #define HAS_UYVYTOYROW_AVX2 216 #define HAS_YUY2TOUV422ROW_AVX2 217 #define HAS_YUY2TOUVROW_AVX2 218 #define HAS_YUY2TOYROW_AVX2 219 220 // Effects: 221 #define HAS_ARGBADDROW_AVX2 222 #define HAS_ARGBATTENUATEROW_AVX2 223 #define HAS_ARGBMIRRORROW_AVX2 224 #define HAS_ARGBMULTIPLYROW_AVX2 225 #define HAS_ARGBSUBTRACTROW_AVX2 226 #define HAS_ARGBUNATTENUATEROW_AVX2 227 #endif // defined(VISUALC_HAS_AVX2) 228 229 // The following are Yasm x86 only: 230 // TODO(fbarchard): Port AVX2 to inline. 231 #if !defined(LIBYUV_DISABLE_X86) && defined(HAVE_YASM) 232 (defined(_M_IX86) || defined(_M_X64) || \ 233 defined(__x86_64__) || defined(__i386__)) 234 #define HAS_MERGEUVROW_AVX2 235 #define HAS_MERGEUVROW_MMX 236 #define HAS_SPLITUVROW_AVX2 237 #define HAS_SPLITUVROW_MMX 238 #define HAS_UYVYTOYROW_AVX2 239 #define HAS_UYVYTOYROW_MMX 240 #define HAS_YUY2TOYROW_AVX2 241 #define HAS_YUY2TOYROW_MMX 242 #endif 243 244 // The following are disabled when SSSE3 is available: 245 #if !defined(LIBYUV_DISABLE_X86) && \ 246 (defined(_M_IX86) || defined(__x86_64__) || defined(__i386__)) && \ 247 !defined(LIBYUV_SSSE3_ONLY) 248 #define HAS_ARGBBLENDROW_SSE2 249 #define HAS_ARGBATTENUATEROW_SSE2 250 #define HAS_MIRRORROW_SSE2 251 #endif 252 253 // The following are available on arm64 platforms: 254 #if !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__) 255 #endif 256 257 // The following are available on Neon platforms: 258 #if !defined(LIBYUV_DISABLE_NEON) && \ 259 (defined(__ARM_NEON__) || defined(LIBYUV_NEON)) 260 #define HAS_ABGRTOUVROW_NEON 261 #define HAS_ABGRTOYROW_NEON 262 #define HAS_ARGB1555TOARGBROW_NEON 263 #define HAS_ARGB1555TOUVROW_NEON 264 #define HAS_ARGB1555TOYROW_NEON 265 #define HAS_ARGB4444TOARGBROW_NEON 266 #define HAS_ARGB4444TOUVROW_NEON 267 #define HAS_ARGB4444TOYROW_NEON 268 #define HAS_ARGBTOARGB1555ROW_NEON 269 #define HAS_ARGBTOARGB4444ROW_NEON 270 #define HAS_ARGBTOBAYERROW_NEON 271 #define HAS_ARGBTOBAYERGGROW_NEON 272 #define HAS_ARGBTORAWROW_NEON 273 #define HAS_ARGBTORGB24ROW_NEON 274 #define HAS_ARGBTORGB565ROW_NEON 275 #define HAS_ARGBTOUV411ROW_NEON 276 #define HAS_ARGBTOUV422ROW_NEON 277 #define HAS_ARGBTOUV444ROW_NEON 278 #define HAS_ARGBTOUVROW_NEON 279 #define HAS_ARGBTOUVJROW_NEON 280 #define HAS_ARGBTOYROW_NEON 281 #define HAS_ARGBTOYJROW_NEON 282 #define HAS_BGRATOUVROW_NEON 283 #define HAS_BGRATOYROW_NEON 284 #define HAS_COPYROW_NEON 285 #define HAS_HALFROW_NEON 286 #define HAS_I400TOARGBROW_NEON 287 #define HAS_I411TOARGBROW_NEON 288 #define HAS_I422TOABGRROW_NEON 289 #define HAS_I422TOARGB1555ROW_NEON 290 #define HAS_I422TOARGB4444ROW_NEON 291 #define HAS_I422TOARGBROW_NEON 292 #define HAS_I422TOBGRAROW_NEON 293 #define HAS_I422TORAWROW_NEON 294 #define HAS_I422TORGB24ROW_NEON 295 #define HAS_I422TORGB565ROW_NEON 296 #define HAS_I422TORGBAROW_NEON 297 #define HAS_I422TOUYVYROW_NEON 298 #define HAS_I422TOYUY2ROW_NEON 299 #define HAS_I444TOARGBROW_NEON 300 #define HAS_MERGEUVROW_NEON 301 #define HAS_MIRRORROW_NEON 302 #define HAS_MIRRORUVROW_NEON 303 #define HAS_NV12TOARGBROW_NEON 304 #define HAS_NV12TORGB565ROW_NEON 305 #define HAS_NV21TOARGBROW_NEON 306 #define HAS_NV21TORGB565ROW_NEON 307 #define HAS_RAWTOARGBROW_NEON 308 #define HAS_RAWTOUVROW_NEON 309 #define HAS_RAWTOYROW_NEON 310 #define HAS_RGB24TOARGBROW_NEON 311 #define HAS_RGB24TOUVROW_NEON 312 #define HAS_RGB24TOYROW_NEON 313 #define HAS_RGB565TOARGBROW_NEON 314 #define HAS_RGB565TOUVROW_NEON 315 #define HAS_RGB565TOYROW_NEON 316 #define HAS_RGBATOUVROW_NEON 317 #define HAS_RGBATOYROW_NEON 318 #define HAS_SETROW_NEON 319 #define HAS_SPLITUVROW_NEON 320 #define HAS_UYVYTOARGBROW_NEON 321 #define HAS_UYVYTOUV422ROW_NEON 322 #define HAS_UYVYTOUVROW_NEON 323 #define HAS_UYVYTOYROW_NEON 324 #define HAS_YTOARGBROW_NEON 325 #define HAS_YUY2TOARGBROW_NEON 326 #define HAS_YUY2TOUV422ROW_NEON 327 #define HAS_YUY2TOUVROW_NEON 328 #define HAS_YUY2TOYROW_NEON 329 330 // Effects: 331 #define HAS_ARGBADDROW_NEON 332 #define HAS_ARGBATTENUATEROW_NEON 333 #define HAS_ARGBBLENDROW_NEON 334 #define HAS_ARGBGRAYROW_NEON 335 #define HAS_ARGBMIRRORROW_NEON 336 #define HAS_ARGBMULTIPLYROW_NEON 337 #define HAS_ARGBQUANTIZEROW_NEON 338 #define HAS_ARGBSEPIAROW_NEON 339 #define HAS_ARGBSHADEROW_NEON 340 #define HAS_ARGBSUBTRACTROW_NEON 341 #define HAS_SOBELROW_NEON 342 #define HAS_SOBELTOPLANEROW_NEON 343 #define HAS_SOBELXYROW_NEON 344 #define HAS_SOBELXROW_NEON 345 #define HAS_SOBELYROW_NEON 346 #define HAS_INTERPOLATEROW_NEON 347 // TODO(fbarchard): Investigate neon unittest failure. 348 // #define HAS_ARGBCOLORMATRIXROW_NEON 349 #endif 350 351 // The following are available on Mips platforms: 352 #if !defined(LIBYUV_DISABLE_MIPS) && defined(__mips__) && \ 353 (_MIPS_SIM == _MIPS_SIM_ABI32) 354 #define HAS_COPYROW_MIPS 355 #if defined(__mips_dsp) && (__mips_dsp_rev >= 2) 356 #define HAS_I422TOABGRROW_MIPS_DSPR2 357 #define HAS_I422TOARGBROW_MIPS_DSPR2 358 #define HAS_I422TOBGRAROW_MIPS_DSPR2 359 #define HAS_INTERPOLATEROWS_MIPS_DSPR2 360 #define HAS_MIRRORROW_MIPS_DSPR2 361 #define HAS_MIRRORUVROW_MIPS_DSPR2 362 #define HAS_SPLITUVROW_MIPS_DSPR2 363 #endif 364 #endif 365 366 #if defined(_MSC_VER) && !defined(__CLR_VER) 367 #define SIMD_ALIGNED(var) __declspec(align(16)) var 368 typedef __declspec(align(16)) int16 vec16[8]; 369 typedef __declspec(align(16)) int32 vec32[4]; 370 typedef __declspec(align(16)) int8 vec8[16]; 371 typedef __declspec(align(16)) uint16 uvec16[8]; 372 typedef __declspec(align(16)) uint32 uvec32[4]; 373 typedef __declspec(align(16)) uint8 uvec8[16]; 374 typedef __declspec(align(32)) int16 lvec16[16]; 375 typedef __declspec(align(32)) int32 lvec32[8]; 376 typedef __declspec(align(32)) int8 lvec8[32]; 377 typedef __declspec(align(32)) uint16 ulvec16[16]; 378 typedef __declspec(align(32)) uint32 ulvec32[8]; 379 typedef __declspec(align(32)) uint8 ulvec8[32]; 380 381 #elif defined(__GNUC__) 382 // Caveat GCC 4.2 to 4.7 have a known issue using vectors with const. 383 #define SIMD_ALIGNED(var) var __attribute__((aligned(16))) 384 typedef int16 __attribute__((vector_size(16))) vec16; 385 typedef int32 __attribute__((vector_size(16))) vec32; 386 typedef int8 __attribute__((vector_size(16))) vec8; 387 typedef uint16 __attribute__((vector_size(16))) uvec16; 388 typedef uint32 __attribute__((vector_size(16))) uvec32; 389 typedef uint8 __attribute__((vector_size(16))) uvec8; 390 #else 391 #define SIMD_ALIGNED(var) var 392 typedef int16 vec16[8]; 393 typedef int32 vec32[4]; 394 typedef int8 vec8[16]; 395 typedef uint16 uvec16[8]; 396 typedef uint32 uvec32[4]; 397 typedef uint8 uvec8[16]; 398 #endif 399 400 #if defined(__APPLE__) || defined(__x86_64__) || defined(__llvm__) 401 #define OMITFP 402 #else 403 #define OMITFP __attribute__((optimize("omit-frame-pointer"))) 404 #endif 405 406 // NaCL macros for GCC x86 and x64. 407 408 // TODO(nfullagar): When pepper_33 toolchain is distributed, default to 409 // NEW_BINUTILS and remove all BUNDLEALIGN occurances. 410 #if defined(__native_client__) 411 #define LABELALIGN ".p2align 5\n" 412 #else 413 #define LABELALIGN ".p2align 2\n" 414 #endif 415 #if defined(__native_client__) && defined(__x86_64__) 416 #if defined(NEW_BINUTILS) 417 #define BUNDLELOCK ".bundle_lock\n" 418 #define BUNDLEUNLOCK ".bundle_unlock\n" 419 #define BUNDLEALIGN "\n" 420 #else 421 #define BUNDLELOCK "\n" 422 #define BUNDLEUNLOCK "\n" 423 #define BUNDLEALIGN ".p2align 5\n" 424 #endif 425 #define MEMACCESS(base) "%%nacl:(%%r15,%q" #base ")" 426 #define MEMACCESS2(offset, base) "%%nacl:" #offset "(%%r15,%q" #base ")" 427 #define MEMLEA(offset, base) #offset "(%q" #base ")" 428 #define MEMLEA3(offset, index, scale) \ 429 #offset "(,%q" #index "," #scale ")" 430 #define MEMLEA4(offset, base, index, scale) \ 431 #offset "(%q" #base ",%q" #index "," #scale ")" 432 #define MEMMOVESTRING(s, d) "%%nacl:(%q" #s "),%%nacl:(%q" #d "), %%r15" 433 #define MEMSTORESTRING(reg, d) "%%" #reg ",%%nacl:(%q" #d "), %%r15" 434 #define MEMOPREG(opcode, offset, base, index, scale, reg) \ 435 BUNDLELOCK \ 436 "lea " #offset "(%q" #base ",%q" #index "," #scale "),%%r14d\n" \ 437 #opcode " (%%r15,%%r14),%%" #reg "\n" \ 438 BUNDLEUNLOCK 439 #define MEMOPMEM(opcode, reg, offset, base, index, scale) \ 440 BUNDLELOCK \ 441 "lea " #offset "(%q" #base ",%q" #index "," #scale "),%%r14d\n" \ 442 #opcode " %%" #reg ",(%%r15,%%r14)\n" \ 443 BUNDLEUNLOCK 444 #define MEMOPARG(opcode, offset, base, index, scale, arg) \ 445 BUNDLELOCK \ 446 "lea " #offset "(%q" #base ",%q" #index "," #scale "),%%r14d\n" \ 447 #opcode " (%%r15,%%r14),%" #arg "\n" \ 448 BUNDLEUNLOCK 449 #else // defined(__native_client__) && defined(__x86_64__) 450 #define BUNDLEALIGN "\n" 451 #define MEMACCESS(base) "(%" #base ")" 452 #define MEMACCESS2(offset, base) #offset "(%" #base ")" 453 #define MEMLEA(offset, base) #offset "(%" #base ")" 454 #define MEMLEA3(offset, index, scale) \ 455 #offset "(,%" #index "," #scale ")" 456 #define MEMLEA4(offset, base, index, scale) \ 457 #offset "(%" #base ",%" #index "," #scale ")" 458 #define MEMMOVESTRING(s, d) 459 #define MEMSTORESTRING(reg, d) 460 #define MEMOPREG(opcode, offset, base, index, scale, reg) \ 461 #opcode " " #offset "(%" #base ",%" #index "," #scale "),%%" #reg "\n" 462 #define MEMOPMEM(opcode, reg, offset, base, index, scale) \ 463 #opcode " %%" #reg ","#offset "(%" #base ",%" #index "," #scale ")\n" 464 #define MEMOPARG(opcode, offset, base, index, scale, arg) \ 465 #opcode " " #offset "(%" #base ",%" #index "," #scale "),%" #arg "\n" 466 #endif // defined(__native_client__) && defined(__x86_64__) 467 468 #if defined(__arm__) 469 #undef MEMACCESS 470 #if defined(__native_client__) 471 #define MEMACCESS(base) ".p2align 3\nbic %" #base ", #0xc0000000\n" 472 #else 473 #define MEMACCESS(base) "\n" 474 #endif 475 #endif 476 477 void I444ToARGBRow_NEON(const uint8* src_y, 478 const uint8* src_u, 479 const uint8* src_v, 480 uint8* dst_argb, 481 int width); 482 void I422ToARGBRow_NEON(const uint8* src_y, 483 const uint8* src_u, 484 const uint8* src_v, 485 uint8* dst_argb, 486 int width); 487 void I411ToARGBRow_NEON(const uint8* src_y, 488 const uint8* src_u, 489 const uint8* src_v, 490 uint8* dst_argb, 491 int width); 492 void I422ToBGRARow_NEON(const uint8* src_y, 493 const uint8* src_u, 494 const uint8* src_v, 495 uint8* dst_bgra, 496 int width); 497 void I422ToABGRRow_NEON(const uint8* src_y, 498 const uint8* src_u, 499 const uint8* src_v, 500 uint8* dst_abgr, 501 int width); 502 void I422ToRGBARow_NEON(const uint8* src_y, 503 const uint8* src_u, 504 const uint8* src_v, 505 uint8* dst_rgba, 506 int width); 507 void I422ToRGB24Row_NEON(const uint8* src_y, 508 const uint8* src_u, 509 const uint8* src_v, 510 uint8* dst_rgb24, 511 int width); 512 void I422ToRAWRow_NEON(const uint8* src_y, 513 const uint8* src_u, 514 const uint8* src_v, 515 uint8* dst_raw, 516 int width); 517 void I422ToRGB565Row_NEON(const uint8* src_y, 518 const uint8* src_u, 519 const uint8* src_v, 520 uint8* dst_rgb565, 521 int width); 522 void I422ToARGB1555Row_NEON(const uint8* src_y, 523 const uint8* src_u, 524 const uint8* src_v, 525 uint8* dst_argb1555, 526 int width); 527 void I422ToARGB4444Row_NEON(const uint8* src_y, 528 const uint8* src_u, 529 const uint8* src_v, 530 uint8* dst_argb4444, 531 int width); 532 void NV12ToARGBRow_NEON(const uint8* src_y, 533 const uint8* src_uv, 534 uint8* dst_argb, 535 int width); 536 void NV21ToARGBRow_NEON(const uint8* src_y, 537 const uint8* src_vu, 538 uint8* dst_argb, 539 int width); 540 void NV12ToRGB565Row_NEON(const uint8* src_y, 541 const uint8* src_uv, 542 uint8* dst_rgb565, 543 int width); 544 void NV21ToRGB565Row_NEON(const uint8* src_y, 545 const uint8* src_vu, 546 uint8* dst_rgb565, 547 int width); 548 void YUY2ToARGBRow_NEON(const uint8* src_yuy2, 549 uint8* dst_argb, 550 int width); 551 void UYVYToARGBRow_NEON(const uint8* src_uyvy, 552 uint8* dst_argb, 553 int width); 554 555 void ARGBToYRow_AVX2(const uint8* src_argb, uint8* dst_y, int pix); 556 void ARGBToYRow_Any_AVX2(const uint8* src_argb, uint8* dst_y, int pix); 557 void ARGBToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix); 558 void ARGBToYJRow_AVX2(const uint8* src_argb, uint8* dst_y, int pix); 559 void ARGBToYJRow_Any_AVX2(const uint8* src_argb, uint8* dst_y, int pix); 560 void ARGBToYJRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix); 561 void BGRAToYRow_SSSE3(const uint8* src_bgra, uint8* dst_y, int pix); 562 void ABGRToYRow_SSSE3(const uint8* src_abgr, uint8* dst_y, int pix); 563 void RGBAToYRow_SSSE3(const uint8* src_rgba, uint8* dst_y, int pix); 564 void RGB24ToYRow_SSSE3(const uint8* src_rgb24, uint8* dst_y, int pix); 565 void RAWToYRow_SSSE3(const uint8* src_raw, uint8* dst_y, int pix); 566 void ARGBToYRow_Unaligned_SSSE3(const uint8* src_argb, uint8* dst_y, int pix); 567 void ARGBToYJRow_Unaligned_SSSE3(const uint8* src_argb, uint8* dst_y, int pix); 568 void BGRAToYRow_Unaligned_SSSE3(const uint8* src_bgra, uint8* dst_y, int pix); 569 void ABGRToYRow_Unaligned_SSSE3(const uint8* src_abgr, uint8* dst_y, int pix); 570 void RGBAToYRow_Unaligned_SSSE3(const uint8* src_rgba, uint8* dst_y, int pix); 571 void RGB24ToYRow_Unaligned_SSSE3(const uint8* src_rgb24, uint8* dst_y, int pix); 572 void RAWToYRow_Unaligned_SSSE3(const uint8* src_raw, uint8* dst_y, int pix); 573 void ARGBToYRow_NEON(const uint8* src_argb, uint8* dst_y, int pix); 574 void ARGBToYJRow_NEON(const uint8* src_argb, uint8* dst_y, int pix); 575 void ARGBToUV444Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v, 576 int pix); 577 void ARGBToUV422Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v, 578 int pix); 579 void ARGBToUV411Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v, 580 int pix); 581 void ARGBToUVRow_NEON(const uint8* src_argb, int src_stride_argb, 582 uint8* dst_u, uint8* dst_v, int pix); 583 void ARGBToUVJRow_NEON(const uint8* src_argb, int src_stride_argb, 584 uint8* dst_u, uint8* dst_v, int pix); 585 void BGRAToUVRow_NEON(const uint8* src_bgra, int src_stride_bgra, 586 uint8* dst_u, uint8* dst_v, int pix); 587 void ABGRToUVRow_NEON(const uint8* src_abgr, int src_stride_abgr, 588 uint8* dst_u, uint8* dst_v, int pix); 589 void RGBAToUVRow_NEON(const uint8* src_rgba, int src_stride_rgba, 590 uint8* dst_u, uint8* dst_v, int pix); 591 void RGB24ToUVRow_NEON(const uint8* src_rgb24, int src_stride_rgb24, 592 uint8* dst_u, uint8* dst_v, int pix); 593 void RAWToUVRow_NEON(const uint8* src_raw, int src_stride_raw, 594 uint8* dst_u, uint8* dst_v, int pix); 595 void RGB565ToUVRow_NEON(const uint8* src_rgb565, int src_stride_rgb565, 596 uint8* dst_u, uint8* dst_v, int pix); 597 void ARGB1555ToUVRow_NEON(const uint8* src_argb1555, int src_stride_argb1555, 598 uint8* dst_u, uint8* dst_v, int pix); 599 void ARGB4444ToUVRow_NEON(const uint8* src_argb4444, int src_stride_argb4444, 600 uint8* dst_u, uint8* dst_v, int pix); 601 void BGRAToYRow_NEON(const uint8* src_bgra, uint8* dst_y, int pix); 602 void ABGRToYRow_NEON(const uint8* src_abgr, uint8* dst_y, int pix); 603 void RGBAToYRow_NEON(const uint8* src_rgba, uint8* dst_y, int pix); 604 void RGB24ToYRow_NEON(const uint8* src_rgb24, uint8* dst_y, int pix); 605 void RAWToYRow_NEON(const uint8* src_raw, uint8* dst_y, int pix); 606 void RGB565ToYRow_NEON(const uint8* src_rgb565, uint8* dst_y, int pix); 607 void ARGB1555ToYRow_NEON(const uint8* src_argb1555, uint8* dst_y, int pix); 608 void ARGB4444ToYRow_NEON(const uint8* src_argb4444, uint8* dst_y, int pix); 609 void ARGBToYRow_C(const uint8* src_argb, uint8* dst_y, int pix); 610 void ARGBToYJRow_C(const uint8* src_argb, uint8* dst_y, int pix); 611 void BGRAToYRow_C(const uint8* src_bgra, uint8* dst_y, int pix); 612 void ABGRToYRow_C(const uint8* src_abgr, uint8* dst_y, int pix); 613 void RGBAToYRow_C(const uint8* src_rgba, uint8* dst_y, int pix); 614 void RGB24ToYRow_C(const uint8* src_rgb24, uint8* dst_y, int pix); 615 void RAWToYRow_C(const uint8* src_raw, uint8* dst_y, int pix); 616 void RGB565ToYRow_C(const uint8* src_rgb565, uint8* dst_y, int pix); 617 void ARGB1555ToYRow_C(const uint8* src_argb1555, uint8* dst_y, int pix); 618 void ARGB4444ToYRow_C(const uint8* src_argb4444, uint8* dst_y, int pix); 619 void ARGBToYRow_Any_SSSE3(const uint8* src_argb, uint8* dst_y, int pix); 620 void ARGBToYJRow_Any_SSSE3(const uint8* src_argb, uint8* dst_y, int pix); 621 void BGRAToYRow_Any_SSSE3(const uint8* src_bgra, uint8* dst_y, int pix); 622 void ABGRToYRow_Any_SSSE3(const uint8* src_abgr, uint8* dst_y, int pix); 623 void RGBAToYRow_Any_SSSE3(const uint8* src_rgba, uint8* dst_y, int pix); 624 void RGB24ToYRow_Any_SSSE3(const uint8* src_rgb24, uint8* dst_y, int pix); 625 void RAWToYRow_Any_SSSE3(const uint8* src_raw, uint8* dst_y, int pix); 626 void ARGBToYRow_Any_NEON(const uint8* src_argb, uint8* dst_y, int pix); 627 void ARGBToYJRow_Any_NEON(const uint8* src_argb, uint8* dst_y, int pix); 628 void BGRAToYRow_Any_NEON(const uint8* src_bgra, uint8* dst_y, int pix); 629 void ABGRToYRow_Any_NEON(const uint8* src_abgr, uint8* dst_y, int pix); 630 void RGBAToYRow_Any_NEON(const uint8* src_rgba, uint8* dst_y, int pix); 631 void RGB24ToYRow_Any_NEON(const uint8* src_rgb24, uint8* dst_y, int pix); 632 void RAWToYRow_Any_NEON(const uint8* src_raw, uint8* dst_y, int pix); 633 void RGB565ToYRow_Any_NEON(const uint8* src_rgb565, uint8* dst_y, int pix); 634 void ARGB1555ToYRow_Any_NEON(const uint8* src_argb1555, uint8* dst_y, int pix); 635 void ARGB4444ToYRow_Any_NEON(const uint8* src_argb4444, uint8* dst_y, int pix); 636 637 void ARGBToUVRow_AVX2(const uint8* src_argb, int src_stride_argb, 638 uint8* dst_u, uint8* dst_v, int width); 639 void ARGBToUVRow_Any_AVX2(const uint8* src_argb, int src_stride_argb, 640 uint8* dst_u, uint8* dst_v, int width); 641 void ARGBToUVRow_SSSE3(const uint8* src_argb, int src_stride_argb, 642 uint8* dst_u, uint8* dst_v, int width); 643 void ARGBToUVJRow_SSSE3(const uint8* src_argb, int src_stride_argb, 644 uint8* dst_u, uint8* dst_v, int width); 645 void BGRAToUVRow_SSSE3(const uint8* src_bgra, int src_stride_bgra, 646 uint8* dst_u, uint8* dst_v, int width); 647 void ABGRToUVRow_SSSE3(const uint8* src_abgr, int src_stride_abgr, 648 uint8* dst_u, uint8* dst_v, int width); 649 void RGBAToUVRow_SSSE3(const uint8* src_rgba, int src_stride_rgba, 650 uint8* dst_u, uint8* dst_v, int width); 651 void ARGBToUVRow_Unaligned_SSSE3(const uint8* src_argb, int src_stride_argb, 652 uint8* dst_u, uint8* dst_v, int width); 653 void ARGBToUVJRow_Unaligned_SSSE3(const uint8* src_argb, int src_stride_argb, 654 uint8* dst_u, uint8* dst_v, int width); 655 void BGRAToUVRow_Unaligned_SSSE3(const uint8* src_bgra, int src_stride_bgra, 656 uint8* dst_u, uint8* dst_v, int width); 657 void ABGRToUVRow_Unaligned_SSSE3(const uint8* src_abgr, int src_stride_abgr, 658 uint8* dst_u, uint8* dst_v, int width); 659 void RGBAToUVRow_Unaligned_SSSE3(const uint8* src_rgba, int src_stride_rgba, 660 uint8* dst_u, uint8* dst_v, int width); 661 void ARGBToUVRow_Any_SSSE3(const uint8* src_argb, int src_stride_argb, 662 uint8* dst_u, uint8* dst_v, int width); 663 void ARGBToUVJRow_Any_SSSE3(const uint8* src_argb, int src_stride_argb, 664 uint8* dst_u, uint8* dst_v, int width); 665 void BGRAToUVRow_Any_SSSE3(const uint8* src_bgra, int src_stride_bgra, 666 uint8* dst_u, uint8* dst_v, int width); 667 void ABGRToUVRow_Any_SSSE3(const uint8* src_abgr, int src_stride_abgr, 668 uint8* dst_u, uint8* dst_v, int width); 669 void RGBAToUVRow_Any_SSSE3(const uint8* src_rgba, int src_stride_rgba, 670 uint8* dst_u, uint8* dst_v, int width); 671 void ARGBToUV444Row_Any_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v, 672 int pix); 673 void ARGBToUV422Row_Any_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v, 674 int pix); 675 void ARGBToUV411Row_Any_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v, 676 int pix); 677 void ARGBToUVRow_Any_NEON(const uint8* src_argb, int src_stride_argb, 678 uint8* dst_u, uint8* dst_v, int pix); 679 void ARGBToUVJRow_Any_NEON(const uint8* src_argb, int src_stride_argb, 680 uint8* dst_u, uint8* dst_v, int pix); 681 void BGRAToUVRow_Any_NEON(const uint8* src_bgra, int src_stride_bgra, 682 uint8* dst_u, uint8* dst_v, int pix); 683 void ABGRToUVRow_Any_NEON(const uint8* src_abgr, int src_stride_abgr, 684 uint8* dst_u, uint8* dst_v, int pix); 685 void RGBAToUVRow_Any_NEON(const uint8* src_rgba, int src_stride_rgba, 686 uint8* dst_u, uint8* dst_v, int pix); 687 void RGB24ToUVRow_Any_NEON(const uint8* src_rgb24, int src_stride_rgb24, 688 uint8* dst_u, uint8* dst_v, int pix); 689 void RAWToUVRow_Any_NEON(const uint8* src_raw, int src_stride_raw, 690 uint8* dst_u, uint8* dst_v, int pix); 691 void RGB565ToUVRow_Any_NEON(const uint8* src_rgb565, int src_stride_rgb565, 692 uint8* dst_u, uint8* dst_v, int pix); 693 void ARGB1555ToUVRow_Any_NEON(const uint8* src_argb1555, 694 int src_stride_argb1555, 695 uint8* dst_u, uint8* dst_v, int pix); 696 void ARGB4444ToUVRow_Any_NEON(const uint8* src_argb4444, 697 int src_stride_argb4444, 698 uint8* dst_u, uint8* dst_v, int pix); 699 void ARGBToUVRow_C(const uint8* src_argb, int src_stride_argb, 700 uint8* dst_u, uint8* dst_v, int width); 701 void ARGBToUVJRow_C(const uint8* src_argb, int src_stride_argb, 702 uint8* dst_u, uint8* dst_v, int width); 703 void BGRAToUVRow_C(const uint8* src_bgra, int src_stride_bgra, 704 uint8* dst_u, uint8* dst_v, int width); 705 void ABGRToUVRow_C(const uint8* src_abgr, int src_stride_abgr, 706 uint8* dst_u, uint8* dst_v, int width); 707 void RGBAToUVRow_C(const uint8* src_rgba, int src_stride_rgba, 708 uint8* dst_u, uint8* dst_v, int width); 709 void RGB24ToUVRow_C(const uint8* src_rgb24, int src_stride_rgb24, 710 uint8* dst_u, uint8* dst_v, int width); 711 void RAWToUVRow_C(const uint8* src_raw, int src_stride_raw, 712 uint8* dst_u, uint8* dst_v, int width); 713 void RGB565ToUVRow_C(const uint8* src_rgb565, int src_stride_rgb565, 714 uint8* dst_u, uint8* dst_v, int width); 715 void ARGB1555ToUVRow_C(const uint8* src_argb1555, int src_stride_argb1555, 716 uint8* dst_u, uint8* dst_v, int width); 717 void ARGB4444ToUVRow_C(const uint8* src_argb4444, int src_stride_argb4444, 718 uint8* dst_u, uint8* dst_v, int width); 719 720 void ARGBToUV444Row_SSSE3(const uint8* src_argb, 721 uint8* dst_u, uint8* dst_v, int width); 722 void ARGBToUV444Row_Unaligned_SSSE3(const uint8* src_argb, 723 uint8* dst_u, uint8* dst_v, int width); 724 void ARGBToUV444Row_Any_SSSE3(const uint8* src_argb, 725 uint8* dst_u, uint8* dst_v, int width); 726 727 void ARGBToUV422Row_SSSE3(const uint8* src_argb, 728 uint8* dst_u, uint8* dst_v, int width); 729 void ARGBToUV422Row_Unaligned_SSSE3(const uint8* src_argb, 730 uint8* dst_u, uint8* dst_v, int width); 731 void ARGBToUV422Row_Any_SSSE3(const uint8* src_argb, 732 uint8* dst_u, uint8* dst_v, int width); 733 734 void ARGBToUV444Row_C(const uint8* src_argb, 735 uint8* dst_u, uint8* dst_v, int width); 736 void ARGBToUV422Row_C(const uint8* src_argb, 737 uint8* dst_u, uint8* dst_v, int width); 738 void ARGBToUV411Row_C(const uint8* src_argb, 739 uint8* dst_u, uint8* dst_v, int width); 740 741 void MirrorRow_AVX2(const uint8* src, uint8* dst, int width); 742 void MirrorRow_SSSE3(const uint8* src, uint8* dst, int width); 743 void MirrorRow_SSE2(const uint8* src, uint8* dst, int width); 744 void MirrorRow_NEON(const uint8* src, uint8* dst, int width); 745 void MirrorRow_MIPS_DSPR2(const uint8* src, uint8* dst, int width); 746 void MirrorRow_C(const uint8* src, uint8* dst, int width); 747 748 void MirrorUVRow_SSSE3(const uint8* src_uv, uint8* dst_u, uint8* dst_v, 749 int width); 750 void MirrorUVRow_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v, 751 int width); 752 void MirrorUVRow_MIPS_DSPR2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, 753 int width); 754 void MirrorUVRow_C(const uint8* src_uv, uint8* dst_u, uint8* dst_v, 755 int width); 756 757 void ARGBMirrorRow_AVX2(const uint8* src, uint8* dst, int width); 758 void ARGBMirrorRow_SSSE3(const uint8* src, uint8* dst, int width); 759 void ARGBMirrorRow_NEON(const uint8* src, uint8* dst, int width); 760 void ARGBMirrorRow_C(const uint8* src, uint8* dst, int width); 761 762 void SplitUVRow_C(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix); 763 void SplitUVRow_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix); 764 void SplitUVRow_AVX2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix); 765 void SplitUVRow_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix); 766 void SplitUVRow_MIPS_DSPR2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, 767 int pix); 768 void SplitUVRow_Unaligned_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, 769 int pix); 770 void SplitUVRow_Unaligned_MIPS_DSPR2(const uint8* src_uv, uint8* dst_u, 771 uint8* dst_v, int pix); 772 void SplitUVRow_Any_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, 773 int pix); 774 void SplitUVRow_Any_AVX2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, 775 int pix); 776 void SplitUVRow_Any_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v, 777 int pix); 778 void SplitUVRow_Any_MIPS_DSPR2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, 779 int pix); 780 781 void MergeUVRow_C(const uint8* src_u, const uint8* src_v, uint8* dst_uv, 782 int width); 783 void MergeUVRow_SSE2(const uint8* src_u, const uint8* src_v, uint8* dst_uv, 784 int width); 785 void MergeUVRow_AVX2(const uint8* src_u, const uint8* src_v, uint8* dst_uv, 786 int width); 787 void MergeUVRow_NEON(const uint8* src_u, const uint8* src_v, uint8* dst_uv, 788 int width); 789 void MergeUVRow_Unaligned_SSE2(const uint8* src_u, const uint8* src_v, 790 uint8* dst_uv, int width); 791 void MergeUVRow_Any_SSE2(const uint8* src_u, const uint8* src_v, uint8* dst_uv, 792 int width); 793 void MergeUVRow_Any_AVX2(const uint8* src_u, const uint8* src_v, uint8* dst_uv, 794 int width); 795 void MergeUVRow_Any_NEON(const uint8* src_u, const uint8* src_v, uint8* dst_uv, 796 int width); 797 798 void CopyRow_SSE2(const uint8* src, uint8* dst, int count); 799 void CopyRow_ERMS(const uint8* src, uint8* dst, int count); 800 void CopyRow_X86(const uint8* src, uint8* dst, int count); 801 void CopyRow_NEON(const uint8* src, uint8* dst, int count); 802 void CopyRow_MIPS(const uint8* src, uint8* dst, int count); 803 void CopyRow_C(const uint8* src, uint8* dst, int count); 804 805 void CopyRow_16_C(const uint16* src, uint16* dst, int count); 806 807 void ARGBCopyAlphaRow_C(const uint8* src_argb, uint8* dst_argb, int width); 808 void ARGBCopyAlphaRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width); 809 void ARGBCopyAlphaRow_AVX2(const uint8* src_argb, uint8* dst_argb, int width); 810 811 void ARGBCopyYToAlphaRow_C(const uint8* src_y, uint8* dst_argb, int width); 812 void ARGBCopyYToAlphaRow_SSE2(const uint8* src_y, uint8* dst_argb, int width); 813 void ARGBCopyYToAlphaRow_AVX2(const uint8* src_y, uint8* dst_argb, int width); 814 815 void SetRow_X86(uint8* dst, uint32 v32, int count); 816 void ARGBSetRows_X86(uint8* dst, uint32 v32, int width, 817 int dst_stride, int height); 818 void SetRow_NEON(uint8* dst, uint32 v32, int count); 819 void ARGBSetRows_NEON(uint8* dst, uint32 v32, int width, 820 int dst_stride, int height); 821 void SetRow_C(uint8* dst, uint32 v32, int count); 822 void ARGBSetRows_C(uint8* dst, uint32 v32, int width, int dst_stride, 823 int height); 824 825 // ARGBShufflers for BGRAToARGB etc. 826 void ARGBShuffleRow_C(const uint8* src_argb, uint8* dst_argb, 827 const uint8* shuffler, int pix); 828 void ARGBShuffleRow_SSE2(const uint8* src_argb, uint8* dst_argb, 829 const uint8* shuffler, int pix); 830 void ARGBShuffleRow_SSSE3(const uint8* src_argb, uint8* dst_argb, 831 const uint8* shuffler, int pix); 832 void ARGBShuffleRow_AVX2(const uint8* src_argb, uint8* dst_argb, 833 const uint8* shuffler, int pix); 834 void ARGBShuffleRow_NEON(const uint8* src_argb, uint8* dst_argb, 835 const uint8* shuffler, int pix); 836 void ARGBShuffleRow_Unaligned_SSSE3(const uint8* src_argb, uint8* dst_argb, 837 const uint8* shuffler, int pix); 838 void ARGBShuffleRow_Any_SSE2(const uint8* src_argb, uint8* dst_argb, 839 const uint8* shuffler, int pix); 840 void ARGBShuffleRow_Any_SSSE3(const uint8* src_argb, uint8* dst_argb, 841 const uint8* shuffler, int pix); 842 void ARGBShuffleRow_Any_AVX2(const uint8* src_argb, uint8* dst_argb, 843 const uint8* shuffler, int pix); 844 void ARGBShuffleRow_Any_NEON(const uint8* src_argb, uint8* dst_argb, 845 const uint8* shuffler, int pix); 846 847 void RGB24ToARGBRow_SSSE3(const uint8* src_rgb24, uint8* dst_argb, int pix); 848 void RAWToARGBRow_SSSE3(const uint8* src_raw, uint8* dst_argb, int pix); 849 void RGB565ToARGBRow_SSE2(const uint8* src_rgb565, uint8* dst_argb, int pix); 850 void ARGB1555ToARGBRow_SSE2(const uint8* src_argb1555, uint8* dst_argb, 851 int pix); 852 void ARGB4444ToARGBRow_SSE2(const uint8* src_argb4444, uint8* dst_argb, 853 int pix); 854 855 void RGB24ToARGBRow_NEON(const uint8* src_rgb24, uint8* dst_argb, int pix); 856 void RAWToARGBRow_NEON(const uint8* src_raw, uint8* dst_argb, int pix); 857 void RGB565ToARGBRow_NEON(const uint8* src_rgb565, uint8* dst_argb, int pix); 858 void ARGB1555ToARGBRow_NEON(const uint8* src_argb1555, uint8* dst_argb, 859 int pix); 860 void ARGB4444ToARGBRow_NEON(const uint8* src_argb4444, uint8* dst_argb, 861 int pix); 862 void RGB24ToARGBRow_C(const uint8* src_rgb24, uint8* dst_argb, int pix); 863 void RAWToARGBRow_C(const uint8* src_raw, uint8* dst_argb, int pix); 864 void RGB565ToARGBRow_C(const uint8* src_rgb, uint8* dst_argb, int pix); 865 void ARGB1555ToARGBRow_C(const uint8* src_argb, uint8* dst_argb, int pix); 866 void ARGB4444ToARGBRow_C(const uint8* src_argb, uint8* dst_argb, int pix); 867 void RGB24ToARGBRow_Any_SSSE3(const uint8* src_rgb24, uint8* dst_argb, int pix); 868 void RAWToARGBRow_Any_SSSE3(const uint8* src_raw, uint8* dst_argb, int pix); 869 void RGB565ToARGBRow_Any_SSE2(const uint8* src_rgb565, uint8* dst_argb, 870 int pix); 871 void ARGB1555ToARGBRow_Any_SSE2(const uint8* src_argb1555, uint8* dst_argb, 872 int pix); 873 void ARGB4444ToARGBRow_Any_SSE2(const uint8* src_argb4444, uint8* dst_argb, 874 int pix); 875 void RGB24ToARGBRow_Any_NEON(const uint8* src_rgb24, uint8* dst_argb, int pix); 876 void RAWToARGBRow_Any_NEON(const uint8* src_raw, uint8* dst_argb, int pix); 877 void RGB565ToARGBRow_Any_NEON(const uint8* src_rgb565, uint8* dst_argb, 878 int pix); 879 void ARGB1555ToARGBRow_Any_NEON(const uint8* src_argb1555, uint8* dst_argb, 880 int pix); 881 void ARGB4444ToARGBRow_Any_NEON(const uint8* src_argb4444, uint8* dst_argb, 882 int pix); 883 884 void ARGBToRGB24Row_SSSE3(const uint8* src_argb, uint8* dst_rgb, int pix); 885 void ARGBToRAWRow_SSSE3(const uint8* src_argb, uint8* dst_rgb, int pix); 886 void ARGBToRGB565Row_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix); 887 void ARGBToARGB1555Row_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix); 888 void ARGBToARGB4444Row_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix); 889 890 void ARGBToRGB24Row_NEON(const uint8* src_argb, uint8* dst_rgb, int pix); 891 void ARGBToRAWRow_NEON(const uint8* src_argb, uint8* dst_rgb, int pix); 892 void ARGBToRGB565Row_NEON(const uint8* src_argb, uint8* dst_rgb, int pix); 893 void ARGBToARGB1555Row_NEON(const uint8* src_argb, uint8* dst_rgb, int pix); 894 void ARGBToARGB4444Row_NEON(const uint8* src_argb, uint8* dst_rgb, int pix); 895 896 void ARGBToRGBARow_C(const uint8* src_argb, uint8* dst_rgb, int pix); 897 void ARGBToRGB24Row_C(const uint8* src_argb, uint8* dst_rgb, int pix); 898 void ARGBToRAWRow_C(const uint8* src_argb, uint8* dst_rgb, int pix); 899 void ARGBToRGB565Row_C(const uint8* src_argb, uint8* dst_rgb, int pix); 900 void ARGBToARGB1555Row_C(const uint8* src_argb, uint8* dst_rgb, int pix); 901 void ARGBToARGB4444Row_C(const uint8* src_argb, uint8* dst_rgb, int pix); 902 903 void I400ToARGBRow_SSE2(const uint8* src_y, uint8* dst_argb, int pix); 904 void I400ToARGBRow_Unaligned_SSE2(const uint8* src_y, uint8* dst_argb, int pix); 905 void I400ToARGBRow_NEON(const uint8* src_y, uint8* dst_argb, int pix); 906 void I400ToARGBRow_C(const uint8* src_y, uint8* dst_argb, int pix); 907 void I400ToARGBRow_Any_SSE2(const uint8* src_y, uint8* dst_argb, int pix); 908 void I400ToARGBRow_Any_NEON(const uint8* src_y, uint8* dst_argb, int pix); 909 910 void I444ToARGBRow_C(const uint8* src_y, 911 const uint8* src_u, 912 const uint8* src_v, 913 uint8* dst_argb, 914 int width); 915 void I422ToARGBRow_C(const uint8* src_y, 916 const uint8* src_u, 917 const uint8* src_v, 918 uint8* dst_argb, 919 int width); 920 void I411ToARGBRow_C(const uint8* src_y, 921 const uint8* src_u, 922 const uint8* src_v, 923 uint8* dst_argb, 924 int width); 925 void NV12ToARGBRow_C(const uint8* src_y, 926 const uint8* src_uv, 927 uint8* dst_argb, 928 int width); 929 void NV21ToRGB565Row_C(const uint8* src_y, 930 const uint8* src_vu, 931 uint8* dst_argb, 932 int width); 933 void NV12ToRGB565Row_C(const uint8* src_y, 934 const uint8* src_uv, 935 uint8* dst_argb, 936 int width); 937 void NV21ToARGBRow_C(const uint8* src_y, 938 const uint8* src_vu, 939 uint8* dst_argb, 940 int width); 941 void YUY2ToARGBRow_C(const uint8* src_yuy2, 942 uint8* dst_argb, 943 int width); 944 void UYVYToARGBRow_C(const uint8* src_uyvy, 945 uint8* dst_argb, 946 int width); 947 void I422ToBGRARow_C(const uint8* src_y, 948 const uint8* src_u, 949 const uint8* src_v, 950 uint8* dst_bgra, 951 int width); 952 void I422ToABGRRow_C(const uint8* src_y, 953 const uint8* src_u, 954 const uint8* src_v, 955 uint8* dst_abgr, 956 int width); 957 void I422ToRGBARow_C(const uint8* src_y, 958 const uint8* src_u, 959 const uint8* src_v, 960 uint8* dst_rgba, 961 int width); 962 void I422ToRGB24Row_C(const uint8* src_y, 963 const uint8* src_u, 964 const uint8* src_v, 965 uint8* dst_rgb24, 966 int width); 967 void I422ToRAWRow_C(const uint8* src_y, 968 const uint8* src_u, 969 const uint8* src_v, 970 uint8* dst_raw, 971 int width); 972 void I422ToARGB4444Row_C(const uint8* src_y, 973 const uint8* src_u, 974 const uint8* src_v, 975 uint8* dst_argb4444, 976 int width); 977 void I422ToARGB1555Row_C(const uint8* src_y, 978 const uint8* src_u, 979 const uint8* src_v, 980 uint8* dst_argb4444, 981 int width); 982 void I422ToRGB565Row_C(const uint8* src_y, 983 const uint8* src_u, 984 const uint8* src_v, 985 uint8* dst_rgb565, 986 int width); 987 void YToARGBRow_C(const uint8* src_y, 988 uint8* dst_argb, 989 int width); 990 void I422ToARGBRow_AVX2(const uint8* src_y, 991 const uint8* src_u, 992 const uint8* src_v, 993 uint8* dst_argb, 994 int width); 995 void I444ToARGBRow_SSSE3(const uint8* src_y, 996 const uint8* src_u, 997 const uint8* src_v, 998 uint8* dst_argb, 999 int width); 1000 void I422ToARGBRow_SSSE3(const uint8* src_y, 1001 const uint8* src_u, 1002 const uint8* src_v, 1003 uint8* dst_argb, 1004 int width); 1005 void I411ToARGBRow_SSSE3(const uint8* src_y, 1006 const uint8* src_u, 1007 const uint8* src_v, 1008 uint8* dst_argb, 1009 int width); 1010 void NV12ToARGBRow_SSSE3(const uint8* src_y, 1011 const uint8* src_uv, 1012 uint8* dst_argb, 1013 int width); 1014 void NV21ToARGBRow_SSSE3(const uint8* src_y, 1015 const uint8* src_vu, 1016 uint8* dst_argb, 1017 int width); 1018 void NV12ToRGB565Row_SSSE3(const uint8* src_y, 1019 const uint8* src_uv, 1020 uint8* dst_argb, 1021 int width); 1022 void NV21ToRGB565Row_SSSE3(const uint8* src_y, 1023 const uint8* src_vu, 1024 uint8* dst_argb, 1025 int width); 1026 void YUY2ToARGBRow_SSSE3(const uint8* src_yuy2, 1027 uint8* dst_argb, 1028 int width); 1029 void UYVYToARGBRow_SSSE3(const uint8* src_uyvy, 1030 uint8* dst_argb, 1031 int width); 1032 void I422ToBGRARow_SSSE3(const uint8* src_y, 1033 const uint8* src_u, 1034 const uint8* src_v, 1035 uint8* dst_bgra, 1036 int width); 1037 void I422ToABGRRow_SSSE3(const uint8* src_y, 1038 const uint8* src_u, 1039 const uint8* src_v, 1040 uint8* dst_abgr, 1041 int width); 1042 void I422ToRGBARow_SSSE3(const uint8* src_y, 1043 const uint8* src_u, 1044 const uint8* src_v, 1045 uint8* dst_rgba, 1046 int width); 1047 void I422ToARGB4444Row_SSSE3(const uint8* src_y, 1048 const uint8* src_u, 1049 const uint8* src_v, 1050 uint8* dst_argb, 1051 int width); 1052 void I422ToARGB1555Row_SSSE3(const uint8* src_y, 1053 const uint8* src_u, 1054 const uint8* src_v, 1055 uint8* dst_argb, 1056 int width); 1057 void I422ToRGB565Row_SSSE3(const uint8* src_y, 1058 const uint8* src_u, 1059 const uint8* src_v, 1060 uint8* dst_argb, 1061 int width); 1062 // RGB24/RAW are unaligned. 1063 void I422ToRGB24Row_SSSE3(const uint8* src_y, 1064 const uint8* src_u, 1065 const uint8* src_v, 1066 uint8* dst_rgb24, 1067 int width); 1068 void I422ToRAWRow_SSSE3(const uint8* src_y, 1069 const uint8* src_u, 1070 const uint8* src_v, 1071 uint8* dst_raw, 1072 int width); 1073 1074 void I444ToARGBRow_Unaligned_SSSE3(const uint8* src_y, 1075 const uint8* src_u, 1076 const uint8* src_v, 1077 uint8* dst_argb, 1078 int width); 1079 void I422ToARGBRow_Unaligned_SSSE3(const uint8* src_y, 1080 const uint8* src_u, 1081 const uint8* src_v, 1082 uint8* dst_argb, 1083 int width); 1084 void I411ToARGBRow_Unaligned_SSSE3(const uint8* src_y, 1085 const uint8* src_u, 1086 const uint8* src_v, 1087 uint8* dst_argb, 1088 int width); 1089 void NV12ToARGBRow_Unaligned_SSSE3(const uint8* src_y, 1090 const uint8* src_uv, 1091 uint8* dst_argb, 1092 int width); 1093 void NV21ToARGBRow_Unaligned_SSSE3(const uint8* src_y, 1094 const uint8* src_vu, 1095 uint8* dst_argb, 1096 int width); 1097 void YUY2ToARGBRow_Unaligned_SSSE3(const uint8* src_yuy2, 1098 uint8* dst_argb, 1099 int width); 1100 void UYVYToARGBRow_Unaligned_SSSE3(const uint8* src_uyvy, 1101 uint8* dst_argb, 1102 int width); 1103 void I422ToBGRARow_Unaligned_SSSE3(const uint8* src_y, 1104 const uint8* src_u, 1105 const uint8* src_v, 1106 uint8* dst_bgra, 1107 int width); 1108 void I422ToABGRRow_Unaligned_SSSE3(const uint8* src_y, 1109 const uint8* src_u, 1110 const uint8* src_v, 1111 uint8* dst_abgr, 1112 int width); 1113 void I422ToRGBARow_Unaligned_SSSE3(const uint8* src_y, 1114 const uint8* src_u, 1115 const uint8* src_v, 1116 uint8* dst_rgba, 1117 int width); 1118 void I422ToARGBRow_Any_AVX2(const uint8* src_y, 1119 const uint8* src_u, 1120 const uint8* src_v, 1121 uint8* dst_argb, 1122 int width); 1123 void I444ToARGBRow_Any_SSSE3(const uint8* src_y, 1124 const uint8* src_u, 1125 const uint8* src_v, 1126 uint8* dst_argb, 1127 int width); 1128 void I422ToARGBRow_Any_SSSE3(const uint8* src_y, 1129 const uint8* src_u, 1130 const uint8* src_v, 1131 uint8* dst_argb, 1132 int width); 1133 void I411ToARGBRow_Any_SSSE3(const uint8* src_y, 1134 const uint8* src_u, 1135 const uint8* src_v, 1136 uint8* dst_argb, 1137 int width); 1138 void NV12ToARGBRow_Any_SSSE3(const uint8* src_y, 1139 const uint8* src_uv, 1140 uint8* dst_argb, 1141 int width); 1142 void NV21ToARGBRow_Any_SSSE3(const uint8* src_y, 1143 const uint8* src_vu, 1144 uint8* dst_argb, 1145 int width); 1146 void NV12ToRGB565Row_Any_SSSE3(const uint8* src_y, 1147 const uint8* src_uv, 1148 uint8* dst_argb, 1149 int width); 1150 void NV21ToRGB565Row_Any_SSSE3(const uint8* src_y, 1151 const uint8* src_vu, 1152 uint8* dst_argb, 1153 int width); 1154 void YUY2ToARGBRow_Any_SSSE3(const uint8* src_yuy2, 1155 uint8* dst_argb, 1156 int width); 1157 void UYVYToARGBRow_Any_SSSE3(const uint8* src_uyvy, 1158 uint8* dst_argb, 1159 int width); 1160 void I422ToBGRARow_Any_SSSE3(const uint8* src_y, 1161 const uint8* src_u, 1162 const uint8* src_v, 1163 uint8* dst_bgra, 1164 int width); 1165 void I422ToABGRRow_Any_SSSE3(const uint8* src_y, 1166 const uint8* src_u, 1167 const uint8* src_v, 1168 uint8* dst_abgr, 1169 int width); 1170 void I422ToRGBARow_Any_SSSE3(const uint8* src_y, 1171 const uint8* src_u, 1172 const uint8* src_v, 1173 uint8* dst_rgba, 1174 int width); 1175 void I422ToARGB4444Row_Any_SSSE3(const uint8* src_y, 1176 const uint8* src_u, 1177 const uint8* src_v, 1178 uint8* dst_rgba, 1179 int width); 1180 void I422ToARGB1555Row_Any_SSSE3(const uint8* src_y, 1181 const uint8* src_u, 1182 const uint8* src_v, 1183 uint8* dst_rgba, 1184 int width); 1185 void I422ToRGB565Row_Any_SSSE3(const uint8* src_y, 1186 const uint8* src_u, 1187 const uint8* src_v, 1188 uint8* dst_rgba, 1189 int width); 1190 // RGB24/RAW are unaligned. 1191 void I422ToRGB24Row_Any_SSSE3(const uint8* src_y, 1192 const uint8* src_u, 1193 const uint8* src_v, 1194 uint8* dst_argb, 1195 int width); 1196 void I422ToRAWRow_Any_SSSE3(const uint8* src_y, 1197 const uint8* src_u, 1198 const uint8* src_v, 1199 uint8* dst_argb, 1200 int width); 1201 void YToARGBRow_SSE2(const uint8* src_y, 1202 uint8* dst_argb, 1203 int width); 1204 void YToARGBRow_NEON(const uint8* src_y, 1205 uint8* dst_argb, 1206 int width); 1207 void YToARGBRow_Any_SSE2(const uint8* src_y, 1208 uint8* dst_argb, 1209 int width); 1210 void YToARGBRow_Any_NEON(const uint8* src_y, 1211 uint8* dst_argb, 1212 int width); 1213 1214 // ARGB preattenuated alpha blend. 1215 void ARGBBlendRow_SSSE3(const uint8* src_argb, const uint8* src_argb1, 1216 uint8* dst_argb, int width); 1217 void ARGBBlendRow_SSE2(const uint8* src_argb, const uint8* src_argb1, 1218 uint8* dst_argb, int width); 1219 void ARGBBlendRow_NEON(const uint8* src_argb, const uint8* src_argb1, 1220 uint8* dst_argb, int width); 1221 void ARGBBlendRow_C(const uint8* src_argb, const uint8* src_argb1, 1222 uint8* dst_argb, int width); 1223 1224 // ARGB multiply images. Same API as Blend, but these require 1225 // pointer and width alignment for SSE2. 1226 void ARGBMultiplyRow_C(const uint8* src_argb, const uint8* src_argb1, 1227 uint8* dst_argb, int width); 1228 void ARGBMultiplyRow_SSE2(const uint8* src_argb, const uint8* src_argb1, 1229 uint8* dst_argb, int width); 1230 void ARGBMultiplyRow_Any_SSE2(const uint8* src_argb, const uint8* src_argb1, 1231 uint8* dst_argb, int width); 1232 void ARGBMultiplyRow_AVX2(const uint8* src_argb, const uint8* src_argb1, 1233 uint8* dst_argb, int width); 1234 void ARGBMultiplyRow_Any_AVX2(const uint8* src_argb, const uint8* src_argb1, 1235 uint8* dst_argb, int width); 1236 void ARGBMultiplyRow_NEON(const uint8* src_argb, const uint8* src_argb1, 1237 uint8* dst_argb, int width); 1238 void ARGBMultiplyRow_Any_NEON(const uint8* src_argb, const uint8* src_argb1, 1239 uint8* dst_argb, int width); 1240 1241 // ARGB add images. 1242 void ARGBAddRow_C(const uint8* src_argb, const uint8* src_argb1, 1243 uint8* dst_argb, int width); 1244 void ARGBAddRow_SSE2(const uint8* src_argb, const uint8* src_argb1, 1245 uint8* dst_argb, int width); 1246 void ARGBAddRow_Any_SSE2(const uint8* src_argb, const uint8* src_argb1, 1247 uint8* dst_argb, int width); 1248 void ARGBAddRow_AVX2(const uint8* src_argb, const uint8* src_argb1, 1249 uint8* dst_argb, int width); 1250 void ARGBAddRow_Any_AVX2(const uint8* src_argb, const uint8* src_argb1, 1251 uint8* dst_argb, int width); 1252 void ARGBAddRow_NEON(const uint8* src_argb, const uint8* src_argb1, 1253 uint8* dst_argb, int width); 1254 void ARGBAddRow_Any_NEON(const uint8* src_argb, const uint8* src_argb1, 1255 uint8* dst_argb, int width); 1256 1257 // ARGB subtract images. Same API as Blend, but these require 1258 // pointer and width alignment for SSE2. 1259 void ARGBSubtractRow_C(const uint8* src_argb, const uint8* src_argb1, 1260 uint8* dst_argb, int width); 1261 void ARGBSubtractRow_SSE2(const uint8* src_argb, const uint8* src_argb1, 1262 uint8* dst_argb, int width); 1263 void ARGBSubtractRow_Any_SSE2(const uint8* src_argb, const uint8* src_argb1, 1264 uint8* dst_argb, int width); 1265 void ARGBSubtractRow_AVX2(const uint8* src_argb, const uint8* src_argb1, 1266 uint8* dst_argb, int width); 1267 void ARGBSubtractRow_Any_AVX2(const uint8* src_argb, const uint8* src_argb1, 1268 uint8* dst_argb, int width); 1269 void ARGBSubtractRow_NEON(const uint8* src_argb, const uint8* src_argb1, 1270 uint8* dst_argb, int width); 1271 void ARGBSubtractRow_Any_NEON(const uint8* src_argb, const uint8* src_argb1, 1272 uint8* dst_argb, int width); 1273 1274 void ARGBToRGB24Row_Any_SSSE3(const uint8* src_argb, uint8* dst_rgb, int pix); 1275 void ARGBToRAWRow_Any_SSSE3(const uint8* src_argb, uint8* dst_rgb, int pix); 1276 void ARGBToRGB565Row_Any_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix); 1277 void ARGBToARGB1555Row_Any_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix); 1278 void ARGBToARGB4444Row_Any_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix); 1279 1280 void ARGBToRGB24Row_Any_NEON(const uint8* src_argb, uint8* dst_rgb, int pix); 1281 void ARGBToRAWRow_Any_NEON(const uint8* src_argb, uint8* dst_rgb, int pix); 1282 void ARGBToRGB565Row_Any_NEON(const uint8* src_argb, uint8* dst_rgb, int pix); 1283 void ARGBToARGB1555Row_Any_NEON(const uint8* src_argb, uint8* dst_rgb, int pix); 1284 void ARGBToARGB4444Row_Any_NEON(const uint8* src_argb, uint8* dst_rgb, int pix); 1285 1286 void I444ToARGBRow_Any_NEON(const uint8* src_y, 1287 const uint8* src_u, 1288 const uint8* src_v, 1289 uint8* dst_argb, 1290 int width); 1291 void I422ToARGBRow_Any_NEON(const uint8* src_y, 1292 const uint8* src_u, 1293 const uint8* src_v, 1294 uint8* dst_argb, 1295 int width); 1296 void I411ToARGBRow_Any_NEON(const uint8* src_y, 1297 const uint8* src_u, 1298 const uint8* src_v, 1299 uint8* dst_argb, 1300 int width); 1301 void I422ToBGRARow_Any_NEON(const uint8* src_y, 1302 const uint8* src_u, 1303 const uint8* src_v, 1304 uint8* dst_argb, 1305 int width); 1306 void I422ToABGRRow_Any_NEON(const uint8* src_y, 1307 const uint8* src_u, 1308 const uint8* src_v, 1309 uint8* dst_argb, 1310 int width); 1311 void I422ToRGBARow_Any_NEON(const uint8* src_y, 1312 const uint8* src_u, 1313 const uint8* src_v, 1314 uint8* dst_argb, 1315 int width); 1316 void I422ToRGB24Row_Any_NEON(const uint8* src_y, 1317 const uint8* src_u, 1318 const uint8* src_v, 1319 uint8* dst_argb, 1320 int width); 1321 void I422ToRAWRow_Any_NEON(const uint8* src_y, 1322 const uint8* src_u, 1323 const uint8* src_v, 1324 uint8* dst_argb, 1325 int width); 1326 void I422ToARGB4444Row_Any_NEON(const uint8* src_y, 1327 const uint8* src_u, 1328 const uint8* src_v, 1329 uint8* dst_argb, 1330 int width); 1331 void I422ToARGB1555Row_Any_NEON(const uint8* src_y, 1332 const uint8* src_u, 1333 const uint8* src_v, 1334 uint8* dst_argb, 1335 int width); 1336 void I422ToRGB565Row_Any_NEON(const uint8* src_y, 1337 const uint8* src_u, 1338 const uint8* src_v, 1339 uint8* dst_argb, 1340 int width); 1341 void NV12ToARGBRow_Any_NEON(const uint8* src_y, 1342 const uint8* src_uv, 1343 uint8* dst_argb, 1344 int width); 1345 void NV21ToARGBRow_Any_NEON(const uint8* src_y, 1346 const uint8* src_uv, 1347 uint8* dst_argb, 1348 int width); 1349 void NV12ToRGB565Row_Any_NEON(const uint8* src_y, 1350 const uint8* src_uv, 1351 uint8* dst_argb, 1352 int width); 1353 void NV21ToRGB565Row_Any_NEON(const uint8* src_y, 1354 const uint8* src_uv, 1355 uint8* dst_argb, 1356 int width); 1357 void YUY2ToARGBRow_Any_NEON(const uint8* src_yuy2, 1358 uint8* dst_argb, 1359 int width); 1360 void UYVYToARGBRow_Any_NEON(const uint8* src_uyvy, 1361 uint8* dst_argb, 1362 int width); 1363 void I422ToARGBRow_MIPS_DSPR2(const uint8* src_y, 1364 const uint8* src_u, 1365 const uint8* src_v, 1366 uint8* dst_argb, 1367 int width); 1368 void I422ToBGRARow_MIPS_DSPR2(const uint8* src_y, 1369 const uint8* src_u, 1370 const uint8* src_v, 1371 uint8* dst_argb, 1372 int width); 1373 void I422ToABGRRow_MIPS_DSPR2(const uint8* src_y, 1374 const uint8* src_u, 1375 const uint8* src_v, 1376 uint8* dst_argb, 1377 int width); 1378 void I422ToARGBRow_MIPS_DSPR2(const uint8* src_y, 1379 const uint8* src_u, 1380 const uint8* src_v, 1381 uint8* dst_argb, 1382 int width); 1383 void I422ToBGRARow_MIPS_DSPR2(const uint8* src_y, 1384 const uint8* src_u, 1385 const uint8* src_v, 1386 uint8* dst_argb, 1387 int width); 1388 void I422ToABGRRow_MIPS_DSPR2(const uint8* src_y, 1389 const uint8* src_u, 1390 const uint8* src_v, 1391 uint8* dst_argb, 1392 int width); 1393 1394 void YUY2ToYRow_AVX2(const uint8* src_yuy2, uint8* dst_y, int pix); 1395 void YUY2ToUVRow_AVX2(const uint8* src_yuy2, int stride_yuy2, 1396 uint8* dst_u, uint8* dst_v, int pix); 1397 void YUY2ToUV422Row_AVX2(const uint8* src_yuy2, 1398 uint8* dst_u, uint8* dst_v, int pix); 1399 void YUY2ToYRow_SSE2(const uint8* src_yuy2, uint8* dst_y, int pix); 1400 void YUY2ToUVRow_SSE2(const uint8* src_yuy2, int stride_yuy2, 1401 uint8* dst_u, uint8* dst_v, int pix); 1402 void YUY2ToUV422Row_SSE2(const uint8* src_yuy2, 1403 uint8* dst_u, uint8* dst_v, int pix); 1404 void YUY2ToYRow_Unaligned_SSE2(const uint8* src_yuy2, 1405 uint8* dst_y, int pix); 1406 void YUY2ToUVRow_Unaligned_SSE2(const uint8* src_yuy2, int stride_yuy2, 1407 uint8* dst_u, uint8* dst_v, int pix); 1408 void YUY2ToUV422Row_Unaligned_SSE2(const uint8* src_yuy2, 1409 uint8* dst_u, uint8* dst_v, int pix); 1410 void YUY2ToYRow_NEON(const uint8* src_yuy2, uint8* dst_y, int pix); 1411 void YUY2ToUVRow_NEON(const uint8* src_yuy2, int stride_yuy2, 1412 uint8* dst_u, uint8* dst_v, int pix); 1413 void YUY2ToUV422Row_NEON(const uint8* src_yuy2, 1414 uint8* dst_u, uint8* dst_v, int pix); 1415 void YUY2ToYRow_C(const uint8* src_yuy2, uint8* dst_y, int pix); 1416 void YUY2ToUVRow_C(const uint8* src_yuy2, int stride_yuy2, 1417 uint8* dst_u, uint8* dst_v, int pix); 1418 void YUY2ToUV422Row_C(const uint8* src_yuy2, 1419 uint8* dst_u, uint8* dst_v, int pix); 1420 void YUY2ToYRow_Any_AVX2(const uint8* src_yuy2, uint8* dst_y, int pix); 1421 void YUY2ToUVRow_Any_AVX2(const uint8* src_yuy2, int stride_yuy2, 1422 uint8* dst_u, uint8* dst_v, int pix); 1423 void YUY2ToUV422Row_Any_AVX2(const uint8* src_yuy2, 1424 uint8* dst_u, uint8* dst_v, int pix); 1425 void YUY2ToYRow_Any_SSE2(const uint8* src_yuy2, uint8* dst_y, int pix); 1426 void YUY2ToUVRow_Any_SSE2(const uint8* src_yuy2, int stride_yuy2, 1427 uint8* dst_u, uint8* dst_v, int pix); 1428 void YUY2ToUV422Row_Any_SSE2(const uint8* src_yuy2, 1429 uint8* dst_u, uint8* dst_v, int pix); 1430 void YUY2ToYRow_Any_NEON(const uint8* src_yuy2, uint8* dst_y, int pix); 1431 void YUY2ToUVRow_Any_NEON(const uint8* src_yuy2, int stride_yuy2, 1432 uint8* dst_u, uint8* dst_v, int pix); 1433 void YUY2ToUV422Row_Any_NEON(const uint8* src_yuy2, 1434 uint8* dst_u, uint8* dst_v, int pix); 1435 void UYVYToYRow_AVX2(const uint8* src_uyvy, uint8* dst_y, int pix); 1436 void UYVYToUVRow_AVX2(const uint8* src_uyvy, int stride_uyvy, 1437 uint8* dst_u, uint8* dst_v, int pix); 1438 void UYVYToUV422Row_AVX2(const uint8* src_uyvy, 1439 uint8* dst_u, uint8* dst_v, int pix); 1440 void UYVYToYRow_SSE2(const uint8* src_uyvy, uint8* dst_y, int pix); 1441 void UYVYToUVRow_SSE2(const uint8* src_uyvy, int stride_uyvy, 1442 uint8* dst_u, uint8* dst_v, int pix); 1443 void UYVYToUV422Row_SSE2(const uint8* src_uyvy, 1444 uint8* dst_u, uint8* dst_v, int pix); 1445 void UYVYToYRow_Unaligned_SSE2(const uint8* src_uyvy, 1446 uint8* dst_y, int pix); 1447 void UYVYToUVRow_Unaligned_SSE2(const uint8* src_uyvy, int stride_uyvy, 1448 uint8* dst_u, uint8* dst_v, int pix); 1449 void UYVYToUV422Row_Unaligned_SSE2(const uint8* src_uyvy, 1450 uint8* dst_u, uint8* dst_v, int pix); 1451 void UYVYToYRow_AVX2(const uint8* src_uyvy, uint8* dst_y, int pix); 1452 void UYVYToUVRow_AVX2(const uint8* src_uyvy, int stride_uyvy, 1453 uint8* dst_u, uint8* dst_v, int pix); 1454 void UYVYToUV422Row_AVX2(const uint8* src_uyvy, 1455 uint8* dst_u, uint8* dst_v, int pix); 1456 void UYVYToYRow_NEON(const uint8* src_uyvy, uint8* dst_y, int pix); 1457 void UYVYToUVRow_NEON(const uint8* src_uyvy, int stride_uyvy, 1458 uint8* dst_u, uint8* dst_v, int pix); 1459 void UYVYToUV422Row_NEON(const uint8* src_uyvy, 1460 uint8* dst_u, uint8* dst_v, int pix); 1461 1462 void UYVYToYRow_C(const uint8* src_uyvy, uint8* dst_y, int pix); 1463 void UYVYToUVRow_C(const uint8* src_uyvy, int stride_uyvy, 1464 uint8* dst_u, uint8* dst_v, int pix); 1465 void UYVYToUV422Row_C(const uint8* src_uyvy, 1466 uint8* dst_u, uint8* dst_v, int pix); 1467 void UYVYToYRow_Any_AVX2(const uint8* src_uyvy, uint8* dst_y, int pix); 1468 void UYVYToUVRow_Any_AVX2(const uint8* src_uyvy, int stride_uyvy, 1469 uint8* dst_u, uint8* dst_v, int pix); 1470 void UYVYToUV422Row_Any_AVX2(const uint8* src_uyvy, 1471 uint8* dst_u, uint8* dst_v, int pix); 1472 void UYVYToYRow_Any_SSE2(const uint8* src_uyvy, uint8* dst_y, int pix); 1473 void UYVYToUVRow_Any_SSE2(const uint8* src_uyvy, int stride_uyvy, 1474 uint8* dst_u, uint8* dst_v, int pix); 1475 void UYVYToUV422Row_Any_SSE2(const uint8* src_uyvy, 1476 uint8* dst_u, uint8* dst_v, int pix); 1477 void UYVYToYRow_Any_NEON(const uint8* src_uyvy, uint8* dst_y, int pix); 1478 void UYVYToUVRow_Any_NEON(const uint8* src_uyvy, int stride_uyvy, 1479 uint8* dst_u, uint8* dst_v, int pix); 1480 void UYVYToUV422Row_Any_NEON(const uint8* src_uyvy, 1481 uint8* dst_u, uint8* dst_v, int pix); 1482 1483 void HalfRow_C(const uint8* src_uv, int src_uv_stride, 1484 uint8* dst_uv, int pix); 1485 void HalfRow_SSE2(const uint8* src_uv, int src_uv_stride, 1486 uint8* dst_uv, int pix); 1487 void HalfRow_AVX2(const uint8* src_uv, int src_uv_stride, 1488 uint8* dst_uv, int pix); 1489 void HalfRow_NEON(const uint8* src_uv, int src_uv_stride, 1490 uint8* dst_uv, int pix); 1491 1492 void HalfRow_16_C(const uint16* src_uv, int src_uv_stride, 1493 uint16* dst_uv, int pix); 1494 1495 void ARGBToBayerRow_C(const uint8* src_argb, uint8* dst_bayer, 1496 uint32 selector, int pix); 1497 void ARGBToBayerRow_SSSE3(const uint8* src_argb, uint8* dst_bayer, 1498 uint32 selector, int pix); 1499 void ARGBToBayerRow_NEON(const uint8* src_argb, uint8* dst_bayer, 1500 uint32 selector, int pix); 1501 void ARGBToBayerRow_Any_SSSE3(const uint8* src_argb, uint8* dst_bayer, 1502 uint32 selector, int pix); 1503 void ARGBToBayerRow_Any_NEON(const uint8* src_argb, uint8* dst_bayer, 1504 uint32 selector, int pix); 1505 void ARGBToBayerGGRow_C(const uint8* src_argb, uint8* dst_bayer, 1506 uint32 /* selector */, int pix); 1507 void ARGBToBayerGGRow_SSE2(const uint8* src_argb, uint8* dst_bayer, 1508 uint32 /* selector */, int pix); 1509 void ARGBToBayerGGRow_NEON(const uint8* src_argb, uint8* dst_bayer, 1510 uint32 /* selector */, int pix); 1511 void ARGBToBayerGGRow_Any_SSE2(const uint8* src_argb, uint8* dst_bayer, 1512 uint32 /* selector */, int pix); 1513 void ARGBToBayerGGRow_Any_NEON(const uint8* src_argb, uint8* dst_bayer, 1514 uint32 /* selector */, int pix); 1515 1516 void I422ToYUY2Row_C(const uint8* src_y, 1517 const uint8* src_u, 1518 const uint8* src_v, 1519 uint8* dst_yuy2, int width); 1520 void I422ToUYVYRow_C(const uint8* src_y, 1521 const uint8* src_u, 1522 const uint8* src_v, 1523 uint8* dst_uyvy, int width); 1524 void I422ToYUY2Row_SSE2(const uint8* src_y, 1525 const uint8* src_u, 1526 const uint8* src_v, 1527 uint8* dst_yuy2, int width); 1528 void I422ToUYVYRow_SSE2(const uint8* src_y, 1529 const uint8* src_u, 1530 const uint8* src_v, 1531 uint8* dst_uyvy, int width); 1532 void I422ToYUY2Row_Any_SSE2(const uint8* src_y, 1533 const uint8* src_u, 1534 const uint8* src_v, 1535 uint8* dst_yuy2, int width); 1536 void I422ToUYVYRow_Any_SSE2(const uint8* src_y, 1537 const uint8* src_u, 1538 const uint8* src_v, 1539 uint8* dst_uyvy, int width); 1540 void I422ToYUY2Row_NEON(const uint8* src_y, 1541 const uint8* src_u, 1542 const uint8* src_v, 1543 uint8* dst_yuy2, int width); 1544 void I422ToUYVYRow_NEON(const uint8* src_y, 1545 const uint8* src_u, 1546 const uint8* src_v, 1547 uint8* dst_uyvy, int width); 1548 void I422ToYUY2Row_Any_NEON(const uint8* src_y, 1549 const uint8* src_u, 1550 const uint8* src_v, 1551 uint8* dst_yuy2, int width); 1552 void I422ToUYVYRow_Any_NEON(const uint8* src_y, 1553 const uint8* src_u, 1554 const uint8* src_v, 1555 uint8* dst_uyvy, int width); 1556 1557 // Effects related row functions. 1558 void ARGBAttenuateRow_C(const uint8* src_argb, uint8* dst_argb, int width); 1559 void ARGBAttenuateRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width); 1560 void ARGBAttenuateRow_SSSE3(const uint8* src_argb, uint8* dst_argb, int width); 1561 void ARGBAttenuateRow_AVX2(const uint8* src_argb, uint8* dst_argb, int width); 1562 void ARGBAttenuateRow_NEON(const uint8* src_argb, uint8* dst_argb, int width); 1563 void ARGBAttenuateRow_Any_SSE2(const uint8* src_argb, uint8* dst_argb, 1564 int width); 1565 void ARGBAttenuateRow_Any_SSSE3(const uint8* src_argb, uint8* dst_argb, 1566 int width); 1567 void ARGBAttenuateRow_Any_AVX2(const uint8* src_argb, uint8* dst_argb, 1568 int width); 1569 void ARGBAttenuateRow_Any_NEON(const uint8* src_argb, uint8* dst_argb, 1570 int width); 1571 1572 // Inverse table for unattenuate, shared by C and SSE2. 1573 extern const uint32 fixed_invtbl8[256]; 1574 void ARGBUnattenuateRow_C(const uint8* src_argb, uint8* dst_argb, int width); 1575 void ARGBUnattenuateRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width); 1576 void ARGBUnattenuateRow_AVX2(const uint8* src_argb, uint8* dst_argb, int width); 1577 void ARGBUnattenuateRow_Any_SSE2(const uint8* src_argb, uint8* dst_argb, 1578 int width); 1579 void ARGBUnattenuateRow_Any_AVX2(const uint8* src_argb, uint8* dst_argb, 1580 int width); 1581 1582 void ARGBGrayRow_C(const uint8* src_argb, uint8* dst_argb, int width); 1583 void ARGBGrayRow_SSSE3(const uint8* src_argb, uint8* dst_argb, int width); 1584 void ARGBGrayRow_NEON(const uint8* src_argb, uint8* dst_argb, int width); 1585 1586 void ARGBSepiaRow_C(uint8* dst_argb, int width); 1587 void ARGBSepiaRow_SSSE3(uint8* dst_argb, int width); 1588 void ARGBSepiaRow_NEON(uint8* dst_argb, int width); 1589 1590 void ARGBColorMatrixRow_C(const uint8* src_argb, uint8* dst_argb, 1591 const int8* matrix_argb, int width); 1592 void ARGBColorMatrixRow_SSSE3(const uint8* src_argb, uint8* dst_argb, 1593 const int8* matrix_argb, int width); 1594 void ARGBColorMatrixRow_NEON(const uint8* src_argb, uint8* dst_argb, 1595 const int8* matrix_argb, int width); 1596 1597 void ARGBColorTableRow_C(uint8* dst_argb, const uint8* table_argb, int width); 1598 void ARGBColorTableRow_X86(uint8* dst_argb, const uint8* table_argb, int width); 1599 1600 void RGBColorTableRow_C(uint8* dst_argb, const uint8* table_argb, int width); 1601 void RGBColorTableRow_X86(uint8* dst_argb, const uint8* table_argb, int width); 1602 1603 void ARGBQuantizeRow_C(uint8* dst_argb, int scale, int interval_size, 1604 int interval_offset, int width); 1605 void ARGBQuantizeRow_SSE2(uint8* dst_argb, int scale, int interval_size, 1606 int interval_offset, int width); 1607 void ARGBQuantizeRow_NEON(uint8* dst_argb, int scale, int interval_size, 1608 int interval_offset, int width); 1609 1610 void ARGBShadeRow_C(const uint8* src_argb, uint8* dst_argb, int width, 1611 uint32 value); 1612 void ARGBShadeRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width, 1613 uint32 value); 1614 void ARGBShadeRow_NEON(const uint8* src_argb, uint8* dst_argb, int width, 1615 uint32 value); 1616 1617 // Used for blur. 1618 void CumulativeSumToAverageRow_SSE2(const int32* topleft, const int32* botleft, 1619 int width, int area, uint8* dst, int count); 1620 void ComputeCumulativeSumRow_SSE2(const uint8* row, int32* cumsum, 1621 const int32* previous_cumsum, int width); 1622 1623 void CumulativeSumToAverageRow_C(const int32* topleft, const int32* botleft, 1624 int width, int area, uint8* dst, int count); 1625 void ComputeCumulativeSumRow_C(const uint8* row, int32* cumsum, 1626 const int32* previous_cumsum, int width); 1627 1628 LIBYUV_API 1629 void ARGBAffineRow_C(const uint8* src_argb, int src_argb_stride, 1630 uint8* dst_argb, const float* uv_dudv, int width); 1631 LIBYUV_API 1632 void ARGBAffineRow_SSE2(const uint8* src_argb, int src_argb_stride, 1633 uint8* dst_argb, const float* uv_dudv, int width); 1634 1635 // Used for I420Scale, ARGBScale, and ARGBInterpolate. 1636 void InterpolateRow_C(uint8* dst_ptr, const uint8* src_ptr, 1637 ptrdiff_t src_stride_ptr, 1638 int width, int source_y_fraction); 1639 void InterpolateRow_SSE2(uint8* dst_ptr, const uint8* src_ptr, 1640 ptrdiff_t src_stride_ptr, int width, 1641 int source_y_fraction); 1642 void InterpolateRow_SSSE3(uint8* dst_ptr, const uint8* src_ptr, 1643 ptrdiff_t src_stride_ptr, int width, 1644 int source_y_fraction); 1645 void InterpolateRow_AVX2(uint8* dst_ptr, const uint8* src_ptr, 1646 ptrdiff_t src_stride_ptr, int width, 1647 int source_y_fraction); 1648 void InterpolateRow_NEON(uint8* dst_ptr, const uint8* src_ptr, 1649 ptrdiff_t src_stride_ptr, int width, 1650 int source_y_fraction); 1651 void InterpolateRows_MIPS_DSPR2(uint8* dst_ptr, const uint8* src_ptr, 1652 ptrdiff_t src_stride_ptr, int width, 1653 int source_y_fraction); 1654 void InterpolateRow_Unaligned_SSE2(uint8* dst_ptr, const uint8* src_ptr, 1655 ptrdiff_t src_stride_ptr, int width, 1656 int source_y_fraction); 1657 void InterpolateRow_Unaligned_SSSE3(uint8* dst_ptr, const uint8* src_ptr, 1658 ptrdiff_t src_stride_ptr, int width, 1659 int source_y_fraction); 1660 void InterpolateRow_Any_NEON(uint8* dst_ptr, const uint8* src_ptr, 1661 ptrdiff_t src_stride_ptr, int width, 1662 int source_y_fraction); 1663 void InterpolateRow_Any_SSE2(uint8* dst_ptr, const uint8* src_ptr, 1664 ptrdiff_t src_stride_ptr, int width, 1665 int source_y_fraction); 1666 void InterpolateRow_Any_SSSE3(uint8* dst_ptr, const uint8* src_ptr, 1667 ptrdiff_t src_stride_ptr, int width, 1668 int source_y_fraction); 1669 void InterpolateRow_Any_AVX2(uint8* dst_ptr, const uint8* src_ptr, 1670 ptrdiff_t src_stride_ptr, int width, 1671 int source_y_fraction); 1672 void InterpolateRows_Any_MIPS_DSPR2(uint8* dst_ptr, const uint8* src_ptr, 1673 ptrdiff_t src_stride_ptr, int width, 1674 int source_y_fraction); 1675 1676 void InterpolateRow_16_C(uint16* dst_ptr, const uint16* src_ptr, 1677 ptrdiff_t src_stride_ptr, 1678 int width, int source_y_fraction); 1679 1680 // Sobel images. 1681 void SobelXRow_C(const uint8* src_y0, const uint8* src_y1, const uint8* src_y2, 1682 uint8* dst_sobelx, int width); 1683 void SobelXRow_SSE2(const uint8* src_y0, const uint8* src_y1, 1684 const uint8* src_y2, uint8* dst_sobelx, int width); 1685 void SobelXRow_NEON(const uint8* src_y0, const uint8* src_y1, 1686 const uint8* src_y2, uint8* dst_sobelx, int width); 1687 void SobelYRow_C(const uint8* src_y0, const uint8* src_y1, 1688 uint8* dst_sobely, int width); 1689 void SobelYRow_SSE2(const uint8* src_y0, const uint8* src_y1, 1690 uint8* dst_sobely, int width); 1691 void SobelYRow_NEON(const uint8* src_y0, const uint8* src_y1, 1692 uint8* dst_sobely, int width); 1693 void SobelRow_C(const uint8* src_sobelx, const uint8* src_sobely, 1694 uint8* dst_argb, int width); 1695 void SobelRow_SSE2(const uint8* src_sobelx, const uint8* src_sobely, 1696 uint8* dst_argb, int width); 1697 void SobelRow_NEON(const uint8* src_sobelx, const uint8* src_sobely, 1698 uint8* dst_argb, int width); 1699 void SobelToPlaneRow_C(const uint8* src_sobelx, const uint8* src_sobely, 1700 uint8* dst_y, int width); 1701 void SobelToPlaneRow_SSE2(const uint8* src_sobelx, const uint8* src_sobely, 1702 uint8* dst_y, int width); 1703 void SobelToPlaneRow_NEON(const uint8* src_sobelx, const uint8* src_sobely, 1704 uint8* dst_y, int width); 1705 void SobelXYRow_C(const uint8* src_sobelx, const uint8* src_sobely, 1706 uint8* dst_argb, int width); 1707 void SobelXYRow_SSE2(const uint8* src_sobelx, const uint8* src_sobely, 1708 uint8* dst_argb, int width); 1709 void SobelXYRow_NEON(const uint8* src_sobelx, const uint8* src_sobely, 1710 uint8* dst_argb, int width); 1711 1712 void ARGBPolynomialRow_C(const uint8* src_argb, 1713 uint8* dst_argb, const float* poly, 1714 int width); 1715 void ARGBPolynomialRow_SSE2(const uint8* src_argb, 1716 uint8* dst_argb, const float* poly, 1717 int width); 1718 void ARGBPolynomialRow_AVX2(const uint8* src_argb, 1719 uint8* dst_argb, const float* poly, 1720 int width); 1721 1722 void ARGBLumaColorTableRow_C(const uint8* src_argb, uint8* dst_argb, int width, 1723 const uint8* luma, uint32 lumacoeff); 1724 void ARGBLumaColorTableRow_SSSE3(const uint8* src_argb, uint8* dst_argb, 1725 int width, 1726 const uint8* luma, uint32 lumacoeff); 1727 1728 #ifdef __cplusplus 1729 } // extern "C" 1730 } // namespace libyuv 1731 #endif 1732 1733 #endif // INCLUDE_LIBYUV_ROW_H_ NOLINT 1734