1; 2; Copyright (c) 2011 The WebM project authors. All Rights Reserved. 3; 4; Use of this source code is governed by a BSD-style license 5; that can be found in the LICENSE file in the root of the source 6; tree. An additional intellectual property rights grant can be found 7; in the file PATENTS. All contributing project authors may 8; be found in the AUTHORS file in the root of the source tree. 9; 10 11 12 EXPORT |vp8_intra4x4_predict_armv6| 13 14 ARM 15 REQUIRE8 16 PRESERVE8 17 18 AREA ||.text||, CODE, READONLY, ALIGN=2 19 20 21;void vp8_intra4x4_predict_armv6(unsigned char *Above, unsigned char *yleft, 22; B_PREDICTION_MODE left_stride, int b_mode, 23; unsigned char *dst, int dst_stride, 24; unsigned char top_left) 25 26; r0: *Above 27; r1: *yleft 28; r2: left_stride 29; r3: b_mode 30; sp + #40: dst 31; sp + #44: dst_stride 32; sp + #48: top_left 33|vp8_intra4x4_predict_armv6| PROC 34 push {r4-r12, lr} 35 36 cmp r3, #10 37 addlt pc, pc, r3, lsl #2 ; position independent switch 38 pop {r4-r12, pc} ; default 39 b b_dc_pred 40 b b_tm_pred 41 b b_ve_pred 42 b b_he_pred 43 b b_ld_pred 44 b b_rd_pred 45 b b_vr_pred 46 b b_vl_pred 47 b b_hd_pred 48 b b_hu_pred 49 50b_dc_pred 51 ; load values 52 ldr r8, [r0] ; Above 53 ldrb r4, [r1], r2 ; Left[0] 54 mov r9, #0 55 ldrb r5, [r1], r2 ; Left[1] 56 ldrb r6, [r1], r2 ; Left[2] 57 usad8 r12, r8, r9 58 ldrb r7, [r1] ; Left[3] 59 60 ; calculate dc 61 add r4, r4, r5 62 add r4, r4, r6 63 add r4, r4, r7 64 add r4, r4, r12 65 add r4, r4, #4 66 ldr r0, [sp, #44] ; dst_stride 67 mov r12, r4, asr #3 ; (expected_dc + 4) >> 3 68 69 add r12, r12, r12, lsl #8 70 ldr r3, [sp, #40] ; dst 71 add r12, r12, r12, lsl #16 72 73 ; store values 74 str r12, [r3], r0 75 str r12, [r3], r0 76 str r12, [r3], r0 77 str r12, [r3] 78 79 pop {r4-r12, pc} 80 81b_tm_pred 82 ldr r8, [r0] ; Above 83 ldrb r9, [sp, #48] ; top_left 84 ldrb r4, [r1], r2 ; Left[0] 85 ldrb r5, [r1], r2 ; Left[1] 86 ldrb r6, [r1], r2 ; Left[2] 87 ldrb r7, [r1] ; Left[3] 88 ldr r0, [sp, #44] ; dst_stride 89 ldr r3, [sp, #40] ; dst 90 91 add r9, r9, r9, lsl #16 ; [tl|tl] 92 uxtb16 r10, r8 ; a[2|0] 93 uxtb16 r11, r8, ror #8 ; a[3|1] 94 ssub16 r10, r10, r9 ; a[2|0] - [tl|tl] 95 ssub16 r11, r11, r9 ; a[3|1] - [tl|tl] 96 97 add r4, r4, r4, lsl #16 ; l[0|0] 98 add r5, r5, r5, lsl #16 ; l[1|1] 99 add r6, r6, r6, lsl #16 ; l[2|2] 100 add r7, r7, r7, lsl #16 ; l[3|3] 101 102 sadd16 r1, r4, r10 ; l[0|0] + a[2|0] - [tl|tl] 103 sadd16 r2, r4, r11 ; l[0|0] + a[3|1] - [tl|tl] 104 usat16 r1, #8, r1 105 usat16 r2, #8, r2 106 107 sadd16 r4, r5, r10 ; l[1|1] + a[2|0] - [tl|tl] 108 sadd16 r5, r5, r11 ; l[1|1] + a[3|1] - [tl|tl] 109 110 add r12, r1, r2, lsl #8 ; [3|2|1|0] 111 str r12, [r3], r0 112 113 usat16 r4, #8, r4 114 usat16 r5, #8, r5 115 116 sadd16 r1, r6, r10 ; l[2|2] + a[2|0] - [tl|tl] 117 sadd16 r2, r6, r11 ; l[2|2] + a[3|1] - [tl|tl] 118 119 add r12, r4, r5, lsl #8 ; [3|2|1|0] 120 str r12, [r3], r0 121 122 usat16 r1, #8, r1 123 usat16 r2, #8, r2 124 125 sadd16 r4, r7, r10 ; l[3|3] + a[2|0] - [tl|tl] 126 sadd16 r5, r7, r11 ; l[3|3] + a[3|1] - [tl|tl] 127 128 add r12, r1, r2, lsl #8 ; [3|2|1|0] 129 130 usat16 r4, #8, r4 131 usat16 r5, #8, r5 132 133 str r12, [r3], r0 134 135 add r12, r4, r5, lsl #8 ; [3|2|1|0] 136 str r12, [r3] 137 138 pop {r4-r12, pc} 139 140b_ve_pred 141 ldr r8, [r0] ; a[3|2|1|0] 142 ldr r11, c00FF00FF 143 ldrb r9, [sp, #48] ; top_left 144 ldrb r10, [r0, #4] ; a[4] 145 146 ldr r0, c00020002 147 148 uxtb16 r4, r8 ; a[2|0] 149 uxtb16 r5, r8, ror #8 ; a[3|1] 150 ldr r2, [sp, #44] ; dst_stride 151 pkhbt r9, r9, r5, lsl #16 ; a[1|-1] 152 153 add r9, r9, r4, lsl #1 ;[a[1]+2*a[2] | tl+2*a[0] ] 154 uxtab16 r9, r9, r5 ;[a[1]+2*a[2]+a[3] | tl+2*a[0]+a[1] ] 155 ldr r3, [sp, #40] ; dst 156 uxtab16 r9, r9, r0 ;[a[1]+2*a[2]+a[3]+2| tl+2*a[0]+a[1]+2] 157 158 add r0, r0, r10, lsl #16 ;[a[4]+2 | 2] 159 add r0, r0, r4, asr #16 ;[a[4]+2 | a[2]+2] 160 add r0, r0, r5, lsl #1 ;[a[4]+2*a[3]+2 | a[2]+2*a[1]+2] 161 uadd16 r4, r4, r0 ;[a[4]+2*a[3]+a[2]+2|a[2]+2*a[1]+a[0]+2] 162 163 and r9, r11, r9, asr #2 164 and r4, r11, r4, asr #2 165 add r9, r9, r4, lsl #8 166 167 ; store values 168 str r9, [r3], r2 169 str r9, [r3], r2 170 str r9, [r3], r2 171 str r9, [r3] 172 173 pop {r4-r12, pc} 174 175 176b_he_pred 177 ldrb r4, [r1], r2 ; Left[0] 178 ldrb r8, [sp, #48] ; top_left 179 ldrb r5, [r1], r2 ; Left[1] 180 ldrb r6, [r1], r2 ; Left[2] 181 ldrb r7, [r1] ; Left[3] 182 183 add r8, r8, r4 ; tl + l[0] 184 add r9, r4, r5 ; l[0] + l[1] 185 add r10, r5, r6 ; l[1] + l[2] 186 add r11, r6, r7 ; l[2] + l[3] 187 188 mov r0, #2<<14 189 190 add r8, r8, r9 ; tl + 2*l[0] + l[1] 191 add r4, r9, r10 ; l[0] + 2*l[1] + l[2] 192 add r5, r10, r11 ; l[1] + 2*l[2] + l[3] 193 add r6, r11, r7, lsl #1 ; l[2] + 2*l[3] + l[3] 194 195 196 add r8, r0, r8, lsl #14 ; (tl + 2*l[0] + l[1])>>2 in top half 197 add r9, r0, r4, lsl #14 ; (l[0] + 2*l[1] + l[2])>>2 in top half 198 add r10,r0, r5, lsl #14 ; (l[1] + 2*l[2] + l[3])>>2 in top half 199 add r11,r0, r6, lsl #14 ; (l[2] + 2*l[3] + l[3])>>2 in top half 200 201 pkhtb r8, r8, r8, asr #16 ; l[-|0|-|0] 202 pkhtb r9, r9, r9, asr #16 ; l[-|1|-|1] 203 pkhtb r10, r10, r10, asr #16 ; l[-|2|-|2] 204 pkhtb r11, r11, r11, asr #16 ; l[-|3|-|3] 205 206 ldr r0, [sp, #44] ; dst_stride 207 ldr r3, [sp, #40] ; dst 208 209 add r8, r8, r8, lsl #8 ; l[0|0|0|0] 210 add r9, r9, r9, lsl #8 ; l[1|1|1|1] 211 add r10, r10, r10, lsl #8 ; l[2|2|2|2] 212 add r11, r11, r11, lsl #8 ; l[3|3|3|3] 213 214 ; store values 215 str r8, [r3], r0 216 str r9, [r3], r0 217 str r10, [r3], r0 218 str r11, [r3] 219 220 pop {r4-r12, pc} 221 222b_ld_pred 223 ldr r4, [r0] ; Above[0-3] 224 ldr r12, c00020002 225 ldr r5, [r0, #4] ; Above[4-7] 226 ldr lr, c00FF00FF 227 228 uxtb16 r6, r4 ; a[2|0] 229 uxtb16 r7, r4, ror #8 ; a[3|1] 230 uxtb16 r8, r5 ; a[6|4] 231 uxtb16 r9, r5, ror #8 ; a[7|5] 232 pkhtb r10, r6, r8 ; a[2|4] 233 pkhtb r11, r7, r9 ; a[3|5] 234 235 add r4, r6, r7, lsl #1 ; [a2+2*a3 | a0+2*a1] 236 add r4, r4, r10, ror #16 ; [a2+2*a3+a4 | a0+2*a1+a2] 237 uxtab16 r4, r4, r12 ; [a2+2*a3+a4+2 | a0+2*a1+a2+2] 238 239 add r5, r7, r10, ror #15 ; [a3+2*a4 | a1+2*a2] 240 add r5, r5, r11, ror #16 ; [a3+2*a4+a5 | a1+2*a2+a3] 241 uxtab16 r5, r5, r12 ; [a3+2*a4+a5+2 | a1+2*a2+a3+2] 242 243 pkhtb r7, r9, r8, asr #16 244 add r6, r8, r9, lsl #1 ; [a6+2*a7 | a4+2*a5] 245 uadd16 r6, r6, r7 ; [a6+2*a7+a7 | a4+2*a5+a6] 246 uxtab16 r6, r6, r12 ; [a6+2*a7+a7+2 | a4+2*a5+a6+2] 247 248 uxth r7, r9 ; [ a5] 249 add r7, r7, r8, asr #15 ; [ a5+2*a6] 250 add r7, r7, r9, asr #16 ; [ a5+2*a6+a7] 251 uxtah r7, r7, r12 ; [ a5+2*a6+a7+2] 252 253 ldr r0, [sp, #44] ; dst_stride 254 ldr r3, [sp, #40] ; dst 255 256 ; scale down 257 and r4, lr, r4, asr #2 258 and r5, lr, r5, asr #2 259 and r6, lr, r6, asr #2 260 mov r7, r7, asr #2 261 262 add r8, r4, r5, lsl #8 ; [3|2|1|0] 263 str r8, [r3], r0 264 265 mov r9, r8, lsr #8 266 add r9, r9, r6, lsl #24 ; [4|3|2|1] 267 str r9, [r3], r0 268 269 mov r10, r9, lsr #8 270 add r10, r10, r7, lsl #24 ; [5|4|3|2] 271 str r10, [r3], r0 272 273 mov r6, r6, lsr #16 274 mov r11, r10, lsr #8 275 add r11, r11, r6, lsl #24 ; [6|5|4|3] 276 str r11, [r3] 277 278 pop {r4-r12, pc} 279 280b_rd_pred 281 ldrb r7, [r1], r2 ; l[0] = pp[3] 282 ldr lr, [r0] ; Above = pp[8|7|6|5] 283 ldrb r8, [sp, #48] ; tl = pp[4] 284 ldrb r6, [r1], r2 ; l[1] = pp[2] 285 ldrb r5, [r1], r2 ; l[2] = pp[1] 286 ldrb r4, [r1], r2 ; l[3] = pp[0] 287 288 289 uxtb16 r9, lr ; p[7|5] 290 uxtb16 r10, lr, ror #8 ; p[8|6] 291 add r4, r4, r6, lsl #16 ; p[2|0] 292 add r5, r5, r7, lsl #16 ; p[3|1] 293 add r6, r6, r8, lsl #16 ; p[4|2] 294 pkhbt r7, r7, r9, lsl #16 ; p[5|3] 295 pkhbt r8, r8, r10, lsl #16 ; p[6|4] 296 297 ldr r12, c00020002 298 ldr lr, c00FF00FF 299 300 add r4, r4, r5, lsl #1 ; [p2+2*p3 | p0+2*p1] 301 add r4, r4, r6 ; [p2+2*p3+p4 | p0+2*p1+p2] 302 uxtab16 r4, r4, r12 ; [p2+2*p3+p4+2 | p0+2*p1+p2+2] 303 304 add r5, r5, r6, lsl #1 ; [p3+2*p4 | p1+2*p2] 305 add r5, r5, r7 ; [p3+2*p4+p5 | p1+2*p2+p3] 306 uxtab16 r5, r5, r12 ; [p3+2*p4+p5+2 | p1+2*p2+p3+2] 307 308 add r6, r7, r8, lsl #1 ; [p5+2*p6 | p3+2*p4] 309 add r6, r6, r9 ; [p5+2*p6+p7 | p3+2*p4+p5] 310 uxtab16 r6, r6, r12 ; [p5+2*p6+p7+2 | p3+2*p4+p5+2] 311 312 add r7, r8, r9, lsl #1 ; [p6+2*p7 | p4+2*p5] 313 add r7, r7, r10 ; [p6+2*p7+p8 | p4+2*p5+p6] 314 uxtab16 r7, r7, r12 ; [p6+2*p7+p8+2 | p4+2*p5+p6+2] 315 316 ldr r0, [sp, #44] ; dst_stride 317 ldr r3, [sp, #40] ; dst 318 319 ; scale down 320 and r7, lr, r7, asr #2 321 and r6, lr, r6, asr #2 322 and r5, lr, r5, asr #2 323 and r4, lr, r4, asr #2 324 325 add r8, r6, r7, lsl #8 ; [6|5|4|3] 326 str r8, [r3], r0 327 328 mov r9, r8, lsl #8 ; [5|4|3|-] 329 uxtab r9, r9, r4, ror #16 ; [5|4|3|2] 330 str r9, [r3], r0 331 332 mov r10, r9, lsl #8 ; [4|3|2|-] 333 uxtab r10, r10, r5 ; [4|3|2|1] 334 str r10, [r3], r0 335 336 mov r11, r10, lsl #8 ; [3|2|1|-] 337 uxtab r11, r11, r4 ; [3|2|1|0] 338 str r11, [r3] 339 340 pop {r4-r12, pc} 341 342b_vr_pred 343 ldrb r7, [r1], r2 ; l[0] = pp[3] 344 ldr lr, [r0] ; Above = pp[8|7|6|5] 345 ldrb r8, [sp, #48] ; tl = pp[4] 346 ldrb r6, [r1], r2 ; l[1] = pp[2] 347 ldrb r5, [r1], r2 ; l[2] = pp[1] 348 ldrb r4, [r1] ; l[3] = pp[0] 349 350 add r5, r5, r7, lsl #16 ; p[3|1] 351 add r6, r6, r8, lsl #16 ; p[4|2] 352 uxtb16 r9, lr ; p[7|5] 353 uxtb16 r10, lr, ror #8 ; p[8|6] 354 pkhbt r7, r7, r9, lsl #16 ; p[5|3] 355 pkhbt r8, r8, r10, lsl #16 ; p[6|4] 356 357 ldr r4, c00010001 358 ldr r12, c00020002 359 ldr lr, c00FF00FF 360 361 add r5, r5, r6, lsl #1 ; [p3+2*p4 | p1+2*p2] 362 add r5, r5, r7 ; [p3+2*p4+p5 | p1+2*p2+p3] 363 uxtab16 r5, r5, r12 ; [p3+2*p4+p5+2 | p1+2*p2+p3+2] 364 365 add r6, r6, r7, lsl #1 ; [p4+2*p5 | p2+2*p3] 366 add r6, r6, r8 ; [p4+2*p5+p6 | p2+2*p3+p4] 367 uxtab16 r6, r6, r12 ; [p4+2*p5+p6+2 | p2+2*p3+p4+2] 368 369 uadd16 r11, r8, r9 ; [p6+p7 | p4+p5] 370 uhadd16 r11, r11, r4 ; [(p6+p7+1)>>1 | (p4+p5+1)>>1] 371 ; [F|E] 372 373 add r7, r7, r8, lsl #1 ; [p5+2*p6 | p3+2*p4] 374 add r7, r7, r9 ; [p5+2*p6+p7 | p3+2*p4+p5] 375 uxtab16 r7, r7, r12 ; [p5+2*p6+p7+2 | p3+2*p4+p5+2] 376 377 uadd16 r2, r9, r10 ; [p7+p8 | p5+p6] 378 uhadd16 r2, r2, r4 ; [(p7+p8+1)>>1 | (p5+p6+1)>>1] 379 ; [J|I] 380 381 add r8, r8, r9, lsl #1 ; [p6+2*p7 | p4+2*p5] 382 add r8, r8, r10 ; [p6+2*p7+p8 | p4+2*p5+p6] 383 uxtab16 r8, r8, r12 ; [p6+2*p7+p8+2 | p4+2*p5+p6+2] 384 385 ldr r0, [sp, #44] ; dst_stride 386 ldr r3, [sp, #40] ; dst 387 388 ; scale down 389 and r5, lr, r5, asr #2 ; [B|A] 390 and r6, lr, r6, asr #2 ; [D|C] 391 and r7, lr, r7, asr #2 ; [H|G] 392 and r8, lr, r8, asr #2 ; [L|K] 393 394 add r12, r11, r2, lsl #8 ; [J|F|I|E] 395 str r12, [r3], r0 396 397 add r12, r7, r8, lsl #8 ; [L|H|K|G] 398 str r12, [r3], r0 399 400 pkhbt r2, r6, r2, lsl #16 ; [-|I|-|C] 401 add r2, r2, r11, lsl #8 ; [F|I|E|C] 402 403 pkhtb r12, r6, r5 ; [-|D|-|A] 404 pkhtb r10, r7, r5, asr #16 ; [-|H|-|B] 405 str r2, [r3], r0 406 add r12, r12, r10, lsl #8 ; [H|D|B|A] 407 str r12, [r3] 408 409 pop {r4-r12, pc} 410 411b_vl_pred 412 ldr r4, [r0] ; [3|2|1|0] = Above[0-3] 413 ldr r12, c00020002 414 ldr r5, [r0, #4] ; [7|6|5|4] = Above[4-7] 415 ldr lr, c00FF00FF 416 ldr r2, c00010001 417 418 mov r0, r4, lsr #16 ; [-|-|3|2] 419 add r0, r0, r5, lsl #16 ; [5|4|3|2] 420 uxtb16 r6, r4 ; [2|0] 421 uxtb16 r7, r4, ror #8 ; [3|1] 422 uxtb16 r8, r0 ; [4|2] 423 uxtb16 r9, r0, ror #8 ; [5|3] 424 uxtb16 r10, r5 ; [6|4] 425 uxtb16 r11, r5, ror #8 ; [7|5] 426 427 uadd16 r4, r6, r7 ; [p2+p3 | p0+p1] 428 uhadd16 r4, r4, r2 ; [(p2+p3+1)>>1 | (p0+p1+1)>>1] 429 ; [B|A] 430 431 add r5, r6, r7, lsl #1 ; [p2+2*p3 | p0+2*p1] 432 add r5, r5, r8 ; [p2+2*p3+p4 | p0+2*p1+p2] 433 uxtab16 r5, r5, r12 ; [p2+2*p3+p4+2 | p0+2*p1+p2+2] 434 435 uadd16 r6, r7, r8 ; [p3+p4 | p1+p2] 436 uhadd16 r6, r6, r2 ; [(p3+p4+1)>>1 | (p1+p2+1)>>1] 437 ; [F|E] 438 439 add r7, r7, r8, lsl #1 ; [p3+2*p4 | p1+2*p2] 440 add r7, r7, r9 ; [p3+2*p4+p5 | p1+2*p2+p3] 441 uxtab16 r7, r7, r12 ; [p3+2*p4+p5+2 | p1+2*p2+p3+2] 442 443 add r8, r8, r9, lsl #1 ; [p4+2*p5 | p2+2*p3] 444 add r8, r8, r10 ; [p4+2*p5+p6 | p2+2*p3+p4] 445 uxtab16 r8, r8, r12 ; [p4+2*p5+p6+2 | p2+2*p3+p4+2] 446 447 add r9, r9, r10, lsl #1 ; [p5+2*p6 | p3+2*p4] 448 add r9, r9, r11 ; [p5+2*p6+p7 | p3+2*p4+p5] 449 uxtab16 r9, r9, r12 ; [p5+2*p6+p7+2 | p3+2*p4+p5+2] 450 451 ldr r0, [sp, #44] ; dst_stride 452 ldr r3, [sp, #40] ; dst 453 454 ; scale down 455 and r5, lr, r5, asr #2 ; [D|C] 456 and r7, lr, r7, asr #2 ; [H|G] 457 and r8, lr, r8, asr #2 ; [I|D] 458 and r9, lr, r9, asr #2 ; [J|H] 459 460 add r10, r4, r6, lsl #8 ; [F|B|E|A] 461 str r10, [r3], r0 462 463 add r5, r5, r7, lsl #8 ; [H|C|G|D] 464 str r5, [r3], r0 465 466 pkhtb r12, r8, r4, asr #16 ; [-|I|-|B] 467 pkhtb r10, r9, r8 ; [-|J|-|D] 468 469 add r12, r6, r12, lsl #8 ; [I|F|B|E] 470 str r12, [r3], r0 471 472 add r10, r7, r10, lsl #8 ; [J|H|D|G] 473 str r10, [r3] 474 475 pop {r4-r12, pc} 476 477b_hd_pred 478 ldrb r7, [r1], r2 ; l[0] = pp[3] 479 ldr lr, [r0] ; Above = pp[8|7|6|5] 480 ldrb r8, [sp, #48] ; tl = pp[4] 481 ldrb r6, [r1], r2 ; l[1] = pp[2] 482 ldrb r5, [r1], r2 ; l[2] = pp[1] 483 ldrb r4, [r1] ; l[3] = pp[0] 484 485 uxtb16 r9, lr ; p[7|5] 486 uxtb16 r10, lr, ror #8 ; p[8|6] 487 488 add r4, r4, r5, lsl #16 ; p[1|0] 489 add r5, r5, r6, lsl #16 ; p[2|1] 490 add r6, r6, r7, lsl #16 ; p[3|2] 491 add r7, r7, r8, lsl #16 ; p[4|3] 492 493 ldr r12, c00020002 494 ldr lr, c00FF00FF 495 ldr r2, c00010001 496 497 pkhtb r8, r7, r9 ; p[4|5] 498 pkhtb r1, r9, r10 ; p[7|6] 499 pkhbt r10, r8, r10, lsl #16 ; p[6|5] 500 501 uadd16 r11, r4, r5 ; [p1+p2 | p0+p1] 502 uhadd16 r11, r11, r2 ; [(p1+p2+1)>>1 | (p0+p1+1)>>1] 503 ; [B|A] 504 505 add r4, r4, r5, lsl #1 ; [p1+2*p2 | p0+2*p1] 506 add r4, r4, r6 ; [p1+2*p2+p3 | p0+2*p1+p2] 507 uxtab16 r4, r4, r12 ; [p1+2*p2+p3+2 | p0+2*p1+p2+2] 508 509 uadd16 r0, r6, r7 ; [p3+p4 | p2+p3] 510 uhadd16 r0, r0, r2 ; [(p3+p4+1)>>1 | (p2+p3+1)>>1] 511 ; [F|E] 512 513 add r5, r6, r7, lsl #1 ; [p3+2*p4 | p2+2*p3] 514 add r5, r5, r8, ror #16 ; [p3+2*p4+p5 | p2+2*p3+p4] 515 uxtab16 r5, r5, r12 ; [p3+2*p4+p5+2 | p2+2*p3+p4+2] 516 517 add r6, r12, r8, ror #16 ; [p5+2 | p4+2] 518 add r6, r6, r10, lsl #1 ; [p5+2+2*p6 | p4+2+2*p5] 519 uxtab16 r6, r6, r1 ; [p5+2+2*p6+p7 | p4+2+2*p5+p6] 520 521 ; scale down 522 and r4, lr, r4, asr #2 ; [D|C] 523 and r5, lr, r5, asr #2 ; [H|G] 524 and r6, lr, r6, asr #2 ; [J|I] 525 526 ldr lr, [sp, #44] ; dst_stride 527 ldr r3, [sp, #40] ; dst 528 529 pkhtb r2, r0, r6 ; [-|F|-|I] 530 pkhtb r12, r6, r5, asr #16 ; [-|J|-|H] 531 add r12, r12, r2, lsl #8 ; [F|J|I|H] 532 add r2, r0, r5, lsl #8 ; [H|F|G|E] 533 mov r12, r12, ror #24 ; [J|I|H|F] 534 str r12, [r3], lr 535 536 mov r7, r11, asr #16 ; [-|-|-|B] 537 str r2, [r3], lr 538 add r7, r7, r0, lsl #16 ; [-|E|-|B] 539 add r7, r7, r4, asr #8 ; [-|E|D|B] 540 add r7, r7, r5, lsl #24 ; [G|E|D|B] 541 str r7, [r3], lr 542 543 add r5, r11, r4, lsl #8 ; [D|B|C|A] 544 str r5, [r3] 545 546 pop {r4-r12, pc} 547 548 549 550b_hu_pred 551 ldrb r4, [r1], r2 ; Left[0] 552 ldr r12, c00020002 553 ldrb r5, [r1], r2 ; Left[1] 554 ldr lr, c00FF00FF 555 ldrb r6, [r1], r2 ; Left[2] 556 ldr r2, c00010001 557 ldrb r7, [r1] ; Left[3] 558 559 add r4, r4, r5, lsl #16 ; [1|0] 560 add r5, r5, r6, lsl #16 ; [2|1] 561 add r9, r6, r7, lsl #16 ; [3|2] 562 563 uadd16 r8, r4, r5 ; [p1+p2 | p0+p1] 564 uhadd16 r8, r8, r2 ; [(p1+p2+1)>>1 | (p0+p1+1)>>1] 565 ; [B|A] 566 567 add r4, r4, r5, lsl #1 ; [p1+2*p2 | p0+2*p1] 568 add r4, r4, r9 ; [p1+2*p2+p3 | p0+2*p1+p2] 569 uxtab16 r4, r4, r12 ; [p1+2*p2+p3+2 | p0+2*p1+p2+2] 570 ldr r2, [sp, #44] ; dst_stride 571 ldr r3, [sp, #40] ; dst 572 and r4, lr, r4, asr #2 ; [D|C] 573 574 add r10, r6, r7 ; [p2+p3] 575 add r11, r10, r7, lsl #1 ; [p2+3*p3] 576 add r10, r10, #1 577 add r11, r11, #2 578 mov r10, r10, asr #1 ; [E] 579 mov r11, r11, asr #2 ; [F] 580 581 add r9, r7, r9, asr #8 ; [-|-|G|G] 582 add r0, r8, r4, lsl #8 ; [D|B|C|A] 583 add r7, r9, r9, lsl #16 ; [G|G|G|G] 584 585 str r0, [r3], r2 586 587 mov r1, r8, asr #16 ; [-|-|-|B] 588 add r1, r1, r4, asr #8 ; [-|-|D|B] 589 add r1, r1, r10, lsl #16 ; [-|E|D|B] 590 add r1, r1, r11, lsl #24 ; [F|E|D|B] 591 str r1, [r3], r2 592 593 add r10, r11, lsl #8 ; [-|-|F|E] 594 add r10, r10, r9, lsl #16 ; [G|G|F|E] 595 str r10, [r3], r2 596 597 str r7, [r3] 598 599 pop {r4-r12, pc} 600 601 ENDP 602 603; constants 604c00010001 605 DCD 0x00010001 606c00020002 607 DCD 0x00020002 608c00FF00FF 609 DCD 0x00FF00FF 610 611 END 612