1@ This file was created from a .asm file 2@ using the ads2gas.pl script. 3 .syntax unified 4@ 5@ Copyright (c) 2014 The WebM project authors. All Rights Reserved. 6@ 7@ Use of this source code is governed by a BSD-style license 8@ that can be found in the LICENSE file in the root of the source 9@ tree. An additional intellectual property rights grant can be found 10@ in the file PATENTS. All contributing project authors may 11@ be found in the AUTHORS file in the root of the source tree. 12@ 13 14 .global vpx_v_predictor_4x4_neon 15 .type vpx_v_predictor_4x4_neon, function 16 .global vpx_v_predictor_8x8_neon 17 .type vpx_v_predictor_8x8_neon, function 18 .global vpx_v_predictor_16x16_neon 19 .type vpx_v_predictor_16x16_neon, function 20 .global vpx_v_predictor_32x32_neon 21 .type vpx_v_predictor_32x32_neon, function 22 .global vpx_h_predictor_4x4_neon 23 .type vpx_h_predictor_4x4_neon, function 24 .global vpx_h_predictor_8x8_neon 25 .type vpx_h_predictor_8x8_neon, function 26 .global vpx_h_predictor_16x16_neon 27 .type vpx_h_predictor_16x16_neon, function 28 .global vpx_h_predictor_32x32_neon 29 .type vpx_h_predictor_32x32_neon, function 30 .global vpx_tm_predictor_4x4_neon 31 .type vpx_tm_predictor_4x4_neon, function 32 .global vpx_tm_predictor_8x8_neon 33 .type vpx_tm_predictor_8x8_neon, function 34 .global vpx_tm_predictor_16x16_neon 35 .type vpx_tm_predictor_16x16_neon, function 36 .global vpx_tm_predictor_32x32_neon 37 .type vpx_tm_predictor_32x32_neon, function 38 .arm 39 .eabi_attribute 24, 1 @Tag_ABI_align_needed 40 .eabi_attribute 25, 1 @Tag_ABI_align_preserved 41 42.text 43.p2align 2 44 45@void vpx_v_predictor_4x4_neon(uint8_t *dst, ptrdiff_t y_stride, 46@ const uint8_t *above, 47@ const uint8_t *left) 48@ r0 uint8_t *dst 49@ r1 ptrdiff_t y_stride 50@ r2 const uint8_t *above 51@ r3 const uint8_t *left 52 53_vpx_v_predictor_4x4_neon: 54 vpx_v_predictor_4x4_neon: @ PROC 55 vld1.32 {d0[0]}, [r2] 56 vst1.32 {d0[0]}, [r0], r1 57 vst1.32 {d0[0]}, [r0], r1 58 vst1.32 {d0[0]}, [r0], r1 59 vst1.32 {d0[0]}, [r0], r1 60 bx lr 61 .size vpx_v_predictor_4x4_neon, .-vpx_v_predictor_4x4_neon @ ENDP @ |vpx_v_predictor_4x4_neon| 62 63@void vpx_v_predictor_8x8_neon(uint8_t *dst, ptrdiff_t y_stride, 64@ const uint8_t *above, 65@ const uint8_t *left) 66@ r0 uint8_t *dst 67@ r1 ptrdiff_t y_stride 68@ r2 const uint8_t *above 69@ r3 const uint8_t *left 70 71_vpx_v_predictor_8x8_neon: 72 vpx_v_predictor_8x8_neon: @ PROC 73 vld1.8 {d0}, [r2] 74 vst1.8 {d0}, [r0], r1 75 vst1.8 {d0}, [r0], r1 76 vst1.8 {d0}, [r0], r1 77 vst1.8 {d0}, [r0], r1 78 vst1.8 {d0}, [r0], r1 79 vst1.8 {d0}, [r0], r1 80 vst1.8 {d0}, [r0], r1 81 vst1.8 {d0}, [r0], r1 82 bx lr 83 .size vpx_v_predictor_8x8_neon, .-vpx_v_predictor_8x8_neon @ ENDP @ |vpx_v_predictor_8x8_neon| 84 85@void vpx_v_predictor_16x16_neon(uint8_t *dst, ptrdiff_t y_stride, 86@ const uint8_t *above, 87@ const uint8_t *left) 88@ r0 uint8_t *dst 89@ r1 ptrdiff_t y_stride 90@ r2 const uint8_t *above 91@ r3 const uint8_t *left 92 93_vpx_v_predictor_16x16_neon: 94 vpx_v_predictor_16x16_neon: @ PROC 95 vld1.8 {q0}, [r2] 96 vst1.8 {q0}, [r0], r1 97 vst1.8 {q0}, [r0], r1 98 vst1.8 {q0}, [r0], r1 99 vst1.8 {q0}, [r0], r1 100 vst1.8 {q0}, [r0], r1 101 vst1.8 {q0}, [r0], r1 102 vst1.8 {q0}, [r0], r1 103 vst1.8 {q0}, [r0], r1 104 vst1.8 {q0}, [r0], r1 105 vst1.8 {q0}, [r0], r1 106 vst1.8 {q0}, [r0], r1 107 vst1.8 {q0}, [r0], r1 108 vst1.8 {q0}, [r0], r1 109 vst1.8 {q0}, [r0], r1 110 vst1.8 {q0}, [r0], r1 111 vst1.8 {q0}, [r0], r1 112 bx lr 113 .size vpx_v_predictor_16x16_neon, .-vpx_v_predictor_16x16_neon @ ENDP @ |vpx_v_predictor_16x16_neon| 114 115@void vpx_v_predictor_32x32_neon(uint8_t *dst, ptrdiff_t y_stride, 116@ const uint8_t *above, 117@ const uint8_t *left) 118@ r0 uint8_t *dst 119@ r1 ptrdiff_t y_stride 120@ r2 const uint8_t *above 121@ r3 const uint8_t *left 122 123_vpx_v_predictor_32x32_neon: 124 vpx_v_predictor_32x32_neon: @ PROC 125 vld1.8 {q0, q1}, [r2] 126 mov r2, #2 127loop_v: 128 vst1.8 {q0, q1}, [r0], r1 129 vst1.8 {q0, q1}, [r0], r1 130 vst1.8 {q0, q1}, [r0], r1 131 vst1.8 {q0, q1}, [r0], r1 132 vst1.8 {q0, q1}, [r0], r1 133 vst1.8 {q0, q1}, [r0], r1 134 vst1.8 {q0, q1}, [r0], r1 135 vst1.8 {q0, q1}, [r0], r1 136 vst1.8 {q0, q1}, [r0], r1 137 vst1.8 {q0, q1}, [r0], r1 138 vst1.8 {q0, q1}, [r0], r1 139 vst1.8 {q0, q1}, [r0], r1 140 vst1.8 {q0, q1}, [r0], r1 141 vst1.8 {q0, q1}, [r0], r1 142 vst1.8 {q0, q1}, [r0], r1 143 vst1.8 {q0, q1}, [r0], r1 144 subs r2, r2, #1 145 bgt loop_v 146 bx lr 147 .size vpx_v_predictor_32x32_neon, .-vpx_v_predictor_32x32_neon @ ENDP @ |vpx_v_predictor_32x32_neon| 148 149@void vpx_h_predictor_4x4_neon(uint8_t *dst, ptrdiff_t y_stride, 150@ const uint8_t *above, 151@ const uint8_t *left) 152@ r0 uint8_t *dst 153@ r1 ptrdiff_t y_stride 154@ r2 const uint8_t *above 155@ r3 const uint8_t *left 156 157_vpx_h_predictor_4x4_neon: 158 vpx_h_predictor_4x4_neon: @ PROC 159 vld1.32 {d1[0]}, [r3] 160 vdup.8 d0, d1[0] 161 vst1.32 {d0[0]}, [r0], r1 162 vdup.8 d0, d1[1] 163 vst1.32 {d0[0]}, [r0], r1 164 vdup.8 d0, d1[2] 165 vst1.32 {d0[0]}, [r0], r1 166 vdup.8 d0, d1[3] 167 vst1.32 {d0[0]}, [r0], r1 168 bx lr 169 .size vpx_h_predictor_4x4_neon, .-vpx_h_predictor_4x4_neon @ ENDP @ |vpx_h_predictor_4x4_neon| 170 171@void vpx_h_predictor_8x8_neon(uint8_t *dst, ptrdiff_t y_stride, 172@ const uint8_t *above, 173@ const uint8_t *left) 174@ r0 uint8_t *dst 175@ r1 ptrdiff_t y_stride 176@ r2 const uint8_t *above 177@ r3 const uint8_t *left 178 179_vpx_h_predictor_8x8_neon: 180 vpx_h_predictor_8x8_neon: @ PROC 181 vld1.64 {d1}, [r3] 182 vdup.8 d0, d1[0] 183 vst1.64 {d0}, [r0], r1 184 vdup.8 d0, d1[1] 185 vst1.64 {d0}, [r0], r1 186 vdup.8 d0, d1[2] 187 vst1.64 {d0}, [r0], r1 188 vdup.8 d0, d1[3] 189 vst1.64 {d0}, [r0], r1 190 vdup.8 d0, d1[4] 191 vst1.64 {d0}, [r0], r1 192 vdup.8 d0, d1[5] 193 vst1.64 {d0}, [r0], r1 194 vdup.8 d0, d1[6] 195 vst1.64 {d0}, [r0], r1 196 vdup.8 d0, d1[7] 197 vst1.64 {d0}, [r0], r1 198 bx lr 199 .size vpx_h_predictor_8x8_neon, .-vpx_h_predictor_8x8_neon @ ENDP @ |vpx_h_predictor_8x8_neon| 200 201@void vpx_h_predictor_16x16_neon(uint8_t *dst, ptrdiff_t y_stride, 202@ const uint8_t *above, 203@ const uint8_t *left) 204@ r0 uint8_t *dst 205@ r1 ptrdiff_t y_stride 206@ r2 const uint8_t *above 207@ r3 const uint8_t *left 208 209_vpx_h_predictor_16x16_neon: 210 vpx_h_predictor_16x16_neon: @ PROC 211 vld1.8 {q1}, [r3] 212 vdup.8 q0, d2[0] 213 vst1.8 {q0}, [r0], r1 214 vdup.8 q0, d2[1] 215 vst1.8 {q0}, [r0], r1 216 vdup.8 q0, d2[2] 217 vst1.8 {q0}, [r0], r1 218 vdup.8 q0, d2[3] 219 vst1.8 {q0}, [r0], r1 220 vdup.8 q0, d2[4] 221 vst1.8 {q0}, [r0], r1 222 vdup.8 q0, d2[5] 223 vst1.8 {q0}, [r0], r1 224 vdup.8 q0, d2[6] 225 vst1.8 {q0}, [r0], r1 226 vdup.8 q0, d2[7] 227 vst1.8 {q0}, [r0], r1 228 vdup.8 q0, d3[0] 229 vst1.8 {q0}, [r0], r1 230 vdup.8 q0, d3[1] 231 vst1.8 {q0}, [r0], r1 232 vdup.8 q0, d3[2] 233 vst1.8 {q0}, [r0], r1 234 vdup.8 q0, d3[3] 235 vst1.8 {q0}, [r0], r1 236 vdup.8 q0, d3[4] 237 vst1.8 {q0}, [r0], r1 238 vdup.8 q0, d3[5] 239 vst1.8 {q0}, [r0], r1 240 vdup.8 q0, d3[6] 241 vst1.8 {q0}, [r0], r1 242 vdup.8 q0, d3[7] 243 vst1.8 {q0}, [r0], r1 244 bx lr 245 .size vpx_h_predictor_16x16_neon, .-vpx_h_predictor_16x16_neon @ ENDP @ |vpx_h_predictor_16x16_neon| 246 247@void vpx_h_predictor_32x32_neon(uint8_t *dst, ptrdiff_t y_stride, 248@ const uint8_t *above, 249@ const uint8_t *left) 250@ r0 uint8_t *dst 251@ r1 ptrdiff_t y_stride 252@ r2 const uint8_t *above 253@ r3 const uint8_t *left 254 255_vpx_h_predictor_32x32_neon: 256 vpx_h_predictor_32x32_neon: @ PROC 257 sub r1, r1, #16 258 mov r2, #2 259loop_h: 260 vld1.8 {q1}, [r3]! 261 vdup.8 q0, d2[0] 262 vst1.8 {q0}, [r0]! 263 vst1.8 {q0}, [r0], r1 264 vdup.8 q0, d2[1] 265 vst1.8 {q0}, [r0]! 266 vst1.8 {q0}, [r0], r1 267 vdup.8 q0, d2[2] 268 vst1.8 {q0}, [r0]! 269 vst1.8 {q0}, [r0], r1 270 vdup.8 q0, d2[3] 271 vst1.8 {q0}, [r0]! 272 vst1.8 {q0}, [r0], r1 273 vdup.8 q0, d2[4] 274 vst1.8 {q0}, [r0]! 275 vst1.8 {q0}, [r0], r1 276 vdup.8 q0, d2[5] 277 vst1.8 {q0}, [r0]! 278 vst1.8 {q0}, [r0], r1 279 vdup.8 q0, d2[6] 280 vst1.8 {q0}, [r0]! 281 vst1.8 {q0}, [r0], r1 282 vdup.8 q0, d2[7] 283 vst1.8 {q0}, [r0]! 284 vst1.8 {q0}, [r0], r1 285 vdup.8 q0, d3[0] 286 vst1.8 {q0}, [r0]! 287 vst1.8 {q0}, [r0], r1 288 vdup.8 q0, d3[1] 289 vst1.8 {q0}, [r0]! 290 vst1.8 {q0}, [r0], r1 291 vdup.8 q0, d3[2] 292 vst1.8 {q0}, [r0]! 293 vst1.8 {q0}, [r0], r1 294 vdup.8 q0, d3[3] 295 vst1.8 {q0}, [r0]! 296 vst1.8 {q0}, [r0], r1 297 vdup.8 q0, d3[4] 298 vst1.8 {q0}, [r0]! 299 vst1.8 {q0}, [r0], r1 300 vdup.8 q0, d3[5] 301 vst1.8 {q0}, [r0]! 302 vst1.8 {q0}, [r0], r1 303 vdup.8 q0, d3[6] 304 vst1.8 {q0}, [r0]! 305 vst1.8 {q0}, [r0], r1 306 vdup.8 q0, d3[7] 307 vst1.8 {q0}, [r0]! 308 vst1.8 {q0}, [r0], r1 309 subs r2, r2, #1 310 bgt loop_h 311 bx lr 312 .size vpx_h_predictor_32x32_neon, .-vpx_h_predictor_32x32_neon @ ENDP @ |vpx_h_predictor_32x32_neon| 313 314@void vpx_tm_predictor_4x4_neon (uint8_t *dst, ptrdiff_t y_stride, 315@ const uint8_t *above, 316@ const uint8_t *left) 317@ r0 uint8_t *dst 318@ r1 ptrdiff_t y_stride 319@ r2 const uint8_t *above 320@ r3 const uint8_t *left 321 322_vpx_tm_predictor_4x4_neon: 323 vpx_tm_predictor_4x4_neon: @ PROC 324 @ Load ytop_left = above[-1]; 325 sub r12, r2, #1 326 vld1.u8 {d0[]}, [r12] 327 328 @ Load above 4 pixels 329 vld1.32 {d2[0]}, [r2] 330 331 @ Compute above - ytop_left 332 vsubl.u8 q3, d2, d0 333 334 @ Load left row by row and compute left + (above - ytop_left) 335 @ 1st row and 2nd row 336 vld1.u8 {d2[]}, [r3]! 337 vld1.u8 {d4[]}, [r3]! 338 vmovl.u8 q1, d2 339 vmovl.u8 q2, d4 340 vadd.s16 q1, q1, q3 341 vadd.s16 q2, q2, q3 342 vqmovun.s16 d0, q1 343 vqmovun.s16 d1, q2 344 vst1.32 {d0[0]}, [r0], r1 345 vst1.32 {d1[0]}, [r0], r1 346 347 @ 3rd row and 4th row 348 vld1.u8 {d2[]}, [r3]! 349 vld1.u8 {d4[]}, [r3] 350 vmovl.u8 q1, d2 351 vmovl.u8 q2, d4 352 vadd.s16 q1, q1, q3 353 vadd.s16 q2, q2, q3 354 vqmovun.s16 d0, q1 355 vqmovun.s16 d1, q2 356 vst1.32 {d0[0]}, [r0], r1 357 vst1.32 {d1[0]}, [r0], r1 358 bx lr 359 .size vpx_tm_predictor_4x4_neon, .-vpx_tm_predictor_4x4_neon @ ENDP @ |vpx_tm_predictor_4x4_neon| 360 361@void vpx_tm_predictor_8x8_neon (uint8_t *dst, ptrdiff_t y_stride, 362@ const uint8_t *above, 363@ const uint8_t *left) 364@ r0 uint8_t *dst 365@ r1 ptrdiff_t y_stride 366@ r2 const uint8_t *above 367@ r3 const uint8_t *left 368 369_vpx_tm_predictor_8x8_neon: 370 vpx_tm_predictor_8x8_neon: @ PROC 371 @ Load ytop_left = above[-1]; 372 sub r12, r2, #1 373 vld1.8 {d0[]}, [r12] 374 375 @ preload 8 left 376 vld1.8 {d30}, [r3] 377 378 @ Load above 8 pixels 379 vld1.64 {d2}, [r2] 380 381 vmovl.u8 q10, d30 382 383 @ Compute above - ytop_left 384 vsubl.u8 q3, d2, d0 385 386 @ Load left row by row and compute left + (above - ytop_left) 387 @ 1st row and 2nd row 388 vdup.16 q0, d20[0] 389 vdup.16 q1, d20[1] 390 vadd.s16 q0, q3, q0 391 vadd.s16 q1, q3, q1 392 393 @ 3rd row and 4th row 394 vdup.16 q8, d20[2] 395 vdup.16 q9, d20[3] 396 vadd.s16 q8, q3, q8 397 vadd.s16 q9, q3, q9 398 399 vqmovun.s16 d0, q0 400 vqmovun.s16 d1, q1 401 vqmovun.s16 d2, q8 402 vqmovun.s16 d3, q9 403 404 vst1.64 {d0}, [r0], r1 405 vst1.64 {d1}, [r0], r1 406 vst1.64 {d2}, [r0], r1 407 vst1.64 {d3}, [r0], r1 408 409 @ 5th row and 6th row 410 vdup.16 q0, d21[0] 411 vdup.16 q1, d21[1] 412 vadd.s16 q0, q3, q0 413 vadd.s16 q1, q3, q1 414 415 @ 7th row and 8th row 416 vdup.16 q8, d21[2] 417 vdup.16 q9, d21[3] 418 vadd.s16 q8, q3, q8 419 vadd.s16 q9, q3, q9 420 421 vqmovun.s16 d0, q0 422 vqmovun.s16 d1, q1 423 vqmovun.s16 d2, q8 424 vqmovun.s16 d3, q9 425 426 vst1.64 {d0}, [r0], r1 427 vst1.64 {d1}, [r0], r1 428 vst1.64 {d2}, [r0], r1 429 vst1.64 {d3}, [r0], r1 430 431 bx lr 432 .size vpx_tm_predictor_8x8_neon, .-vpx_tm_predictor_8x8_neon @ ENDP @ |vpx_tm_predictor_8x8_neon| 433 434@void vpx_tm_predictor_16x16_neon (uint8_t *dst, ptrdiff_t y_stride, 435@ const uint8_t *above, 436@ const uint8_t *left) 437@ r0 uint8_t *dst 438@ r1 ptrdiff_t y_stride 439@ r2 const uint8_t *above 440@ r3 const uint8_t *left 441 442_vpx_tm_predictor_16x16_neon: 443 vpx_tm_predictor_16x16_neon: @ PROC 444 @ Load ytop_left = above[-1]; 445 sub r12, r2, #1 446 vld1.8 {d0[]}, [r12] 447 448 @ Load above 8 pixels 449 vld1.8 {q1}, [r2] 450 451 @ preload 8 left into r12 452 vld1.8 {d18}, [r3]! 453 454 @ Compute above - ytop_left 455 vsubl.u8 q2, d2, d0 456 vsubl.u8 q3, d3, d0 457 458 vmovl.u8 q10, d18 459 460 @ Load left row by row and compute left + (above - ytop_left) 461 @ Process 8 rows in each single loop and loop 2 times to process 16 rows. 462 mov r2, #2 463 464loop_16x16_neon: 465 @ Process two rows. 466 vdup.16 q0, d20[0] 467 vdup.16 q8, d20[1] 468 vadd.s16 q1, q0, q2 469 vadd.s16 q0, q0, q3 470 vadd.s16 q11, q8, q2 471 vadd.s16 q8, q8, q3 472 vqmovun.s16 d2, q1 473 vqmovun.s16 d3, q0 474 vqmovun.s16 d22, q11 475 vqmovun.s16 d23, q8 476 vdup.16 q0, d20[2] @ proload next 2 rows data 477 vdup.16 q8, d20[3] 478 vst1.64 {d2,d3}, [r0], r1 479 vst1.64 {d22,d23}, [r0], r1 480 481 @ Process two rows. 482 vadd.s16 q1, q0, q2 483 vadd.s16 q0, q0, q3 484 vadd.s16 q11, q8, q2 485 vadd.s16 q8, q8, q3 486 vqmovun.s16 d2, q1 487 vqmovun.s16 d3, q0 488 vqmovun.s16 d22, q11 489 vqmovun.s16 d23, q8 490 vdup.16 q0, d21[0] @ proload next 2 rows data 491 vdup.16 q8, d21[1] 492 vst1.64 {d2,d3}, [r0], r1 493 vst1.64 {d22,d23}, [r0], r1 494 495 vadd.s16 q1, q0, q2 496 vadd.s16 q0, q0, q3 497 vadd.s16 q11, q8, q2 498 vadd.s16 q8, q8, q3 499 vqmovun.s16 d2, q1 500 vqmovun.s16 d3, q0 501 vqmovun.s16 d22, q11 502 vqmovun.s16 d23, q8 503 vdup.16 q0, d21[2] @ proload next 2 rows data 504 vdup.16 q8, d21[3] 505 vst1.64 {d2,d3}, [r0], r1 506 vst1.64 {d22,d23}, [r0], r1 507 508 509 vadd.s16 q1, q0, q2 510 vadd.s16 q0, q0, q3 511 vadd.s16 q11, q8, q2 512 vadd.s16 q8, q8, q3 513 vqmovun.s16 d2, q1 514 vqmovun.s16 d3, q0 515 vqmovun.s16 d22, q11 516 vqmovun.s16 d23, q8 517 vld1.8 {d18}, [r3]! @ preload 8 left into r12 518 vmovl.u8 q10, d18 519 vst1.64 {d2,d3}, [r0], r1 520 vst1.64 {d22,d23}, [r0], r1 521 522 subs r2, r2, #1 523 bgt loop_16x16_neon 524 525 bx lr 526 .size vpx_tm_predictor_16x16_neon, .-vpx_tm_predictor_16x16_neon @ ENDP @ |vpx_tm_predictor_16x16_neon| 527 528@void vpx_tm_predictor_32x32_neon (uint8_t *dst, ptrdiff_t y_stride, 529@ const uint8_t *above, 530@ const uint8_t *left) 531@ r0 uint8_t *dst 532@ r1 ptrdiff_t y_stride 533@ r2 const uint8_t *above 534@ r3 const uint8_t *left 535 536_vpx_tm_predictor_32x32_neon: 537 vpx_tm_predictor_32x32_neon: @ PROC 538 @ Load ytop_left = above[-1]; 539 sub r12, r2, #1 540 vld1.8 {d0[]}, [r12] 541 542 @ Load above 32 pixels 543 vld1.8 {q1}, [r2]! 544 vld1.8 {q2}, [r2] 545 546 @ preload 8 left pixels 547 vld1.8 {d26}, [r3]! 548 549 @ Compute above - ytop_left 550 vsubl.u8 q8, d2, d0 551 vsubl.u8 q9, d3, d0 552 vsubl.u8 q10, d4, d0 553 vsubl.u8 q11, d5, d0 554 555 vmovl.u8 q3, d26 556 557 @ Load left row by row and compute left + (above - ytop_left) 558 @ Process 8 rows in each single loop and loop 4 times to process 32 rows. 559 mov r2, #4 560 561loop_32x32_neon: 562 @ Process two rows. 563 vdup.16 q0, d6[0] 564 vdup.16 q2, d6[1] 565 vadd.s16 q12, q0, q8 566 vadd.s16 q13, q0, q9 567 vadd.s16 q14, q0, q10 568 vadd.s16 q15, q0, q11 569 vqmovun.s16 d0, q12 570 vqmovun.s16 d1, q13 571 vadd.s16 q12, q2, q8 572 vadd.s16 q13, q2, q9 573 vqmovun.s16 d2, q14 574 vqmovun.s16 d3, q15 575 vadd.s16 q14, q2, q10 576 vadd.s16 q15, q2, q11 577 vst1.64 {d0-d3}, [r0], r1 578 vqmovun.s16 d24, q12 579 vqmovun.s16 d25, q13 580 vqmovun.s16 d26, q14 581 vqmovun.s16 d27, q15 582 vdup.16 q1, d6[2] 583 vdup.16 q2, d6[3] 584 vst1.64 {d24-d27}, [r0], r1 585 586 @ Process two rows. 587 vadd.s16 q12, q1, q8 588 vadd.s16 q13, q1, q9 589 vadd.s16 q14, q1, q10 590 vadd.s16 q15, q1, q11 591 vqmovun.s16 d0, q12 592 vqmovun.s16 d1, q13 593 vadd.s16 q12, q2, q8 594 vadd.s16 q13, q2, q9 595 vqmovun.s16 d2, q14 596 vqmovun.s16 d3, q15 597 vadd.s16 q14, q2, q10 598 vadd.s16 q15, q2, q11 599 vst1.64 {d0-d3}, [r0], r1 600 vqmovun.s16 d24, q12 601 vqmovun.s16 d25, q13 602 vqmovun.s16 d26, q14 603 vqmovun.s16 d27, q15 604 vdup.16 q0, d7[0] 605 vdup.16 q2, d7[1] 606 vst1.64 {d24-d27}, [r0], r1 607 608 @ Process two rows. 609 vadd.s16 q12, q0, q8 610 vadd.s16 q13, q0, q9 611 vadd.s16 q14, q0, q10 612 vadd.s16 q15, q0, q11 613 vqmovun.s16 d0, q12 614 vqmovun.s16 d1, q13 615 vadd.s16 q12, q2, q8 616 vadd.s16 q13, q2, q9 617 vqmovun.s16 d2, q14 618 vqmovun.s16 d3, q15 619 vadd.s16 q14, q2, q10 620 vadd.s16 q15, q2, q11 621 vst1.64 {d0-d3}, [r0], r1 622 vqmovun.s16 d24, q12 623 vqmovun.s16 d25, q13 624 vqmovun.s16 d26, q14 625 vqmovun.s16 d27, q15 626 vdup.16 q0, d7[2] 627 vdup.16 q2, d7[3] 628 vst1.64 {d24-d27}, [r0], r1 629 630 @ Process two rows. 631 vadd.s16 q12, q0, q8 632 vadd.s16 q13, q0, q9 633 vadd.s16 q14, q0, q10 634 vadd.s16 q15, q0, q11 635 vqmovun.s16 d0, q12 636 vqmovun.s16 d1, q13 637 vadd.s16 q12, q2, q8 638 vadd.s16 q13, q2, q9 639 vqmovun.s16 d2, q14 640 vqmovun.s16 d3, q15 641 vadd.s16 q14, q2, q10 642 vadd.s16 q15, q2, q11 643 vst1.64 {d0-d3}, [r0], r1 644 vqmovun.s16 d24, q12 645 vqmovun.s16 d25, q13 646 vld1.8 {d0}, [r3]! @ preload 8 left pixels 647 vqmovun.s16 d26, q14 648 vqmovun.s16 d27, q15 649 vmovl.u8 q3, d0 650 vst1.64 {d24-d27}, [r0], r1 651 652 subs r2, r2, #1 653 bgt loop_32x32_neon 654 655 bx lr 656 .size vpx_tm_predictor_32x32_neon, .-vpx_tm_predictor_32x32_neon @ ENDP @ |vpx_tm_predictor_32x32_neon| 657 658 .section .note.GNU-stack,"",%progbits 659