/hardware/intel/common/omx-components/videocodec/libvpx_internal/libvpx/vp8/encoder/arm/armv6/ |
D | walsh_v6.asm | 35 ldrd r8, r9, [r0], r2 42 qadd16 r7, r8, r9 ; [d1|a1] [9+11 | 8+10] 43 qsub16 r8, r8, r9 ; [c1|b1] [9-11 | 8-10] 46 qadd16 r9, r10, r11 ; [d1|a1] [13+15 | 12+14] 65 lsls r2, r9, #16 66 smuad r2, r9, lr ; D0 = a1<<2 + d1<<2 106 smusd r9, r9, lr ; D3 = a1<<2 - d1<<2 107 add r7, r5, r9 ; d1_3 = B3 + D3 108 sub r5, r5, r9 ; c1_3 = B3 - D3 113 adds r9, r3, r5 ; b2 = b1_3 + c1_3 [all …]
|
D | vp8_subtract_armv6.asm | 29 stmfd sp!, {r4-r9} 40 mov r9, #4 ; loop count 55 subs r9, r9, #1 ; decrement loop counter 66 ldmfd sp!, {r4-r9} 94 uxtb16 r9, r7 ; [p2 | p0] (A) 98 usub16 r6, r8, r9 ; [d2 | d0] (A) 105 pkhtb r9, r7, r6, asr #16 ; [d3 | d2] (A) 109 str r9, [r0], #4 ; diff (A) 111 uxtb16 r9, r11 ; [p2 | p0] (B) 115 usub16 r6, r8, r9 ; [d2 | d0] (B) [all …]
|
D | vp8_short_fdct4x4_armv6.asm | 49 ldrd r8, r9, [r0] ; [i5 | i4] [i7 | i6] 57 ror r9, r9, #16 ; [i6 | i7] 59 qadd16 r6, r8, r9 ; [i5+i6 | i4+i7] = [b1 | a1] without shift 60 qsub16 r7, r8, r9 ; [i5-i6 | i4-i7] = [c1 | d1] without shift 68 smuad r9, r6, lr ; o4 = (i5+i6)*8 + (i4+i7)*8 76 pkhbt r9, r9, r6, lsl #4 ; [o5 | o4], keep in register for PART 2 134 qadd16 r5, r9, r2 ; b1 = [i5+i9 | i4+i8] 135 qsub16 r6, r9, r2 ; c1 = [i5-i9 | i4-i8] 149 lsl r9, r3, #16 ; prepare bottom halfword for scaling 152 pkhtb r5, r3, r9, asr #20 ; pack and scale bottom halfword [all …]
|
D | vp8_fast_quantize_b_armv6.asm | 43 ldr r9, [r3], #4 ; [z1 | z0] 47 ssat16 lr, #1, r9 ; [sz1 | sz0] 48 eor r9, r9, lr ; [z1 ^ sz1 | z0 ^ sz0] 49 ssub16 r9, r9, lr ; x = (z ^ sz) - sz 50 sadd16 r9, r9, r10 ; [x1+r1 | x0+r0] 54 smulbb r0, r9, r11 ; [(x0+r0)*q0] 55 smultt r9, r9, r11 ; [(x1+r1)*q1] 61 pkhtb r0, r9, r0, asr #16 ; [y1 | y0] 62 ldr r9, [r4], #4 ; [q3 | q2] 69 smulbb r10, r12, r9 ; [(x2+r2)*q2] [all …]
|
D | vp8_mse16x16_armv6.asm | 29 push {r4-r9, lr} 47 usub8 r9, r6, r5 ; calculate difference with reversed operands 49 sel r8, r9, lr ; select bytes with negative difference 69 usub8 r9, r6, r5 ; calculate difference with reversed operands 70 sel r8, r9, lr ; select bytes with negative difference 88 usub8 r9, r6, r5 ; calculate difference with reversed operands 89 sel r8, r9, lr ; select bytes with negative difference 110 usub8 r9, r6, r5 ; calculate difference with reversed operands 112 sel r8, r9, lr ; select bytes with negative difference 134 pop {r4-r9, pc}
|
/hardware/intel/common/omx-components/videocodec/libvpx_internal/libvpx/vp8/common/arm/armv6/ |
D | filter_v6.asm | 55 ldrb r9, [r0, #-1] 62 pkhbt lr, r8, r9, lsl #16 ; r9 | r8 63 pkhbt r8, r9, r10, lsl #16 ; r10 | r9 65 ldrb r9, [r0] 70 pkhbt r11, r11, r9, lsl #16 ; r9 | r11 79 pkhbt r9, r9, r10, lsl #16 ; r10 | r9 82 smlad lr, r9, r6, lr 91 ldrneb r9, [r0, #-1] 145 ldrb r9, [r0, #-1] 152 pkhbt lr, r8, r9, lsl #16 ; r9 | r8 [all …]
|
D | dequant_idct_v6.asm | 65 smulwt r9, r3, r6 69 pkhbt r7, r7, r9, lsl #16 74 smulwb r9, r3, r12 77 pkhbt r9, r9, r11, lsl #16 80 uadd16 r7, r12, r9 85 uadd16 r9, r10, r6 93 str r9, [r1], #4 102 ldr r9, [r0], #4 112 pkhbt lr, r9, r7, lsl #16 115 pkhtb r8, r7, r9, asr #16 [all …]
|
D | intra4x4_predict_v6.asm | 54 mov r9, #0 57 usad8 r12, r8, r9 83 ldrb r9, [sp, #48] ; top_left 91 add r9, r9, r9, lsl #16 ; [tl|tl] 94 ssub16 r10, r10, r9 ; a[2|0] - [tl|tl] 95 ssub16 r11, r11, r9 ; a[3|1] - [tl|tl] 143 ldrb r9, [sp, #48] ; top_left 151 pkhbt r9, r9, r5, lsl #16 ; a[1|-1] 153 add r9, r9, r4, lsl #1 ;[a[1]+2*a[2] | tl+2*a[0] ] 154 uxtab16 r9, r9, r5 ;[a[1]+2*a[2]+a[3] | tl+2*a[0]+a[1] ] [all …]
|
D | loopfilter_v6.asm | 71 ldr r9, [src], pstep ; p3 90 uqsub8 r6, r9, r10 ; p3 - p2 91 uqsub8 r7, r10, r9 ; p2 - p3 102 ldr r9, [src], pstep ; q0 111 uqsub8 r11, r12, r9 ; p0 - q0 112 uqsub8 r12, r9, r12 ; q0 - p0 119 uqsub8 r7, r9, r10 ; q0 - q1 121 uqsub8 r6, r10, r9 ; q1 - q0 123 uqsub8 r9, r11, r10 ; q2 - q1 130 orr r10, r9, r10 ; abs (q2-q1) [all …]
|
D | bilinearfilter_v6.asm | 56 ldrb r9, [r0, #3] 63 pkhbt r8, r8, r9, lsl #16 ; src[3] | src[2] 65 pkhbt r9, r9, r10, lsl #16 ; src[4] | src[3] 68 smuad r9, r9, r5 82 add r9, r9, #0x40 84 usat r9, #16, r9, asr #7 89 strh r9, [r1], r3 99 add r9, r2, r4, lsl #1 ; adding back block width 100 pld [r0, r9] ; preload next row 117 ldrb r9, [r0, #3] [all …]
|
D | simpleloopfilter_v6.asm | 69 mov r9, #4 ; double the count. we're doing 4 at a time 173 pkhbt r9, r3, r4, lsl #16 176 ;transpose r7, r8, r9, r10 to r3, r4, r5, r6 177 TRANSPOSE_MATRIX r7, r8, r9, r10, r3, r4, r5, r6 181 uqsub8 r9, r4, r5 ; p0 - q0 184 orr r9, r9, r10 ; abs(p0 - q0) 186 uqadd8 r9, r9, r9 ; abs(p0 - q0) * 2 188 uqadd8 r7, r7, r9 ; abs(p0 - q0)*2 + abs(p1 - q1)/2 207 ldr r9, c0x03030303 ; r9 = 3 216 qadd8 r9 , r3 , r9 ; Filter2 = vp8_filter + 3 [all …]
|
D | iwalsh_v6.asm | 31 ldr r9, [r0, #28] ; [15 | 14] 43 qadd16 r10, r3, r9 ; a1 [3+15 | 2+14] 46 qsub16 lr, r3, r9 ; d1 [3-15 | 2-14] 51 qsub16 r9, lr, r12 ; d1 - c1 [15 | 14] 95 qsubaddx r4, r8, r9 ; [c1|a1] [13-14 | 12+15] 96 qaddsubx r5, r8, r9 ; [b1|d1] [13+14 | 12-15] 101 qaddsubx r9, r5, r4 ; [a2|d2] [b1+a1 | d1-c1] 106 qadd16 r9, r9, r10 ; [a2+3|d2+3] 119 asr r12, r9, #19 ; [12] 124 sxth r9, r9 [all …]
|
D | idct_v6.asm | 42 smulbt r9, r5, r6 ; (ip[5] * cospi8sqrt2minus1) >> 16 48 pkhtb r7, r9, r7, asr #16 ; 5c | 4c 53 smulbb r9, r5, r12 ; (ip[12] * cospi8sqrt2minus1) >> 16 58 pkhtb r9, r11, r9, asr #16 ; 13c | 12c 61 uadd16 r7, r12, r9 ; 13c+13 | 12c+12 68 uadd16 r9, r10, r6 ; a+d 77 str r9, [r0], #4 ; o1 | o0 91 smulbt r9, r5, r6 ; (ip[5] * cospi8sqrt2minus1) >> 16 97 pkhtb r7, r7, r9, asr #16 ; 1c | 5c 102 pkhbt r9, r14, r12, lsl #16 ; i2 | i6 [all …]
|
D | sixtappredict8x4_v6.asm | 56 ldrb r9, [r0, #-2] 64 pkhbt r8, r8, r9, lsl #16 ; r9 | r8 65 pkhbt r9, r9, r10, lsl #16 ; r10 | r9 75 smlad r12, r9, r4, r12 77 pkhbt r10, r10, r6, lsl #16 ; r10 | r9 97 movne r7, r9 99 movne r9, r11 104 ;;add r9, ppl, #30 ; attempt to load 2 adjacent cache lines 107 ;;pld [src, r9] 157 ldrh r9, [sp, #12] [all …]
|
D | dequantize_v6.asm | 21 stmdb sp!, {r4-r9, lr} 33 smulbb r9, r5, r6 45 strh r9, [r2], #2 46 smulbb r9, r5, r6 60 strh r9, [r2], #2 66 ldmia sp!, {r4-r9, pc}
|
D | vp8_variance16x16_armv6.asm | 46 usub8 r9, r5, r4 ; calculate difference with reversed operands 48 sel r6, r9, lr ; select bytes with negative difference 70 usub8 r9, r5, r4 ; calculate difference with reversed operands 71 sel r6, r9, lr ; select bytes with negative difference 94 usub8 r9, r5, r4 ; calculate difference with reversed operands 95 sel r6, r9, lr ; select bytes with negative difference 119 usub8 r9, r5, r4 ; calculate difference with reversed operands 121 sel r6, r9, lr ; select bytes with negative difference
|
D | vp8_sad16x16_armv6.asm | 41 ldr r9, [r2, #0x4] ; load 4 ref pixels (1A) 46 usad8 r8, r7, r9 ; calculate sad for 4 pixels 66 ldr r9, [r2, #0x4] ; load 4 ref pixels (2A) 71 usad8 r8, r7, r9 ; calculate sad for 4 pixels
|
/hardware/intel/common/omx-components/videocodec/libvpx_internal/libvpx/vpx_scale/arm/neon/ |
D | vp8_vpxyv12_copyframe_func_neon.asm | 32 ldr r9, [r1, #yv12_buffer_config_u_buffer] ;srcptr1 44 str r9, [sp, #4] 53 mov r9, r3 71 vst1.8 {q0, q1}, [r9]! 73 vst1.8 {q2, q3}, [r9]! 75 vst1.8 {q4, q5}, [r9]! 77 vst1.8 {q6, q7}, [r9]! 113 mov r9, r3 127 vst1.8 {q0, q1}, [r9]! 129 vst1.8 {q2, q3}, [r9]! [all …]
|
/hardware/intel/common/omx-components/videocodec/libvpx_internal/libvpx/vp8/encoder/arm/armv5te/ |
D | boolhuff_armv5te.asm | 98 mov r9, #0 99 strb r9, [r7, r4] ; w->buffer[x] =(unsigned char)0 109 ldrb r9, [r7, r4] ; w->buffer[x] 110 add r9, r9, #1 111 strb r9, [r7, r4] ; w->buffer[x] + 1 114 ldr r9, [r0, #vp8_writer_buffer] 124 VALIDATE_POS r9, r1 ; validate_buffer at pos 126 strb r7, [r9, r4] ; w->buffer[w->pos++] 174 mov r9, #0 175 strb r9, [r7, r4] ; w->buffer[x] =(unsigned char)0 [all …]
|
/hardware/intel/common/omx-components/videocodec/libvpx_internal/libvpx/vp8/common/ppc/ |
D | filter_altivec.asm | 24 load_c \V0, HFilter, r5, r9, r10 49 lvx v10, 0, r9 ;# v10..v14 = first 5 rows 50 lvx v11, r10, r9 51 addi r9, r9, 32 52 lvx v12, 0, r9 53 lvx v13, r10, r9 54 addi r9, r9, 32 55 lvx v14, 0, r9 119 addi r9, r9, 16 ;# P5 = newest input row 120 lvx \P5, 0, r9 [all …]
|
D | loopfilter_filters_altivec.asm | 682 la r9, -48(r1) ;# temporary space for reading in vectors 685 RLVmb v0, r9 686 RLVmb v1, r9 687 RLVmb v2, r9 688 RLVmb v3, r9 689 RLVmb v4, r9 690 RLVmb v5, r9 691 RLVmb v6, r9 692 RLVmb v7, r9 705 WLVmb v17, r9 [all …]
|
D | sad_altivec.asm | 157 li r9, 8 158 mtctr r9 177 li r9, 4 178 mtctr r9 197 li r9, 8 198 mtctr r9 217 li r9, 4 218 mtctr r9 236 lwz r9, 0(\I) 241 stw r9, 12(r1)
|
D | variance_altivec.asm | 162 li r9, 4 163 mtctr r9 269 li r9, 8 270 mtctr r9 290 li r9, 8 291 mtctr r9 311 li r9, 4 312 mtctr r9 330 lwz r9, 0(\I) 335 stw r9, 12(r1)
|
/hardware/intel/common/omx-components/videocodec/libvpx_internal/libvpx/vp8/encoder/ppc/ |
D | encodemb_altivec.asm | 25 li r9, 256 26 add r3, r3, r9 27 add r3, r3, r9 28 add r6, r6, r9 31 li r9, 4 32 mtctr r9 71 mtctr r9
|
D | fdct_altivec.asm | 36 load_c v0, dct_tab, 0, r9, r10 42 load_c v4, ppc_dctperm_tab, 0, r9, r10 43 load_c v5, ppc_dctperm_tab, r6, r9, r10 45 load_c v6, round_tab, 0, r10, r9 125 lvx v6, r6, r9 ;# v6 = Vround 154 lvx v6, r6, r9 ;# v6 = Vround 165 lvx v6, 0, r9 ;# v6 = Hround 176 lvx v6, r6, r9 ;# v6 = Vround
|