/hardware/intel/common/omx-components/videocodec/libvpx_internal/libvpx/vpx_scale/arm/neon/ |
D | vp8_vpxyv12_copysrcframe_func_neon.asm | 50 mov r12, r5 61 sub r12, r12, #128 62 cmp r12, #128 73 cmp r12, #0 79 sub r12, r12, #8 80 cmp r12, #8 85 cmp r12, #0 90 subs r12, r12, #1 107 mov r12, r5 114 sub r12, r12, #128 [all …]
|
/hardware/intel/common/omx-components/videocodec/libvpx_internal/libvpx/vp8/common/arm/neon/ |
D | buildintrapredictorsmby_neon.asm | 44 mov r12, #128 45 vdup.u8 q0, r12 48 mov r12, #0 67 add r12, r4, r6 84 add r12, r12, r3 85 add r12, r12, r4 86 add r12, r12, r5 87 add r12, r12, r6 94 add r12, r12, r3 95 add r12, r12, r4 [all …]
|
D | mbloopfilter_neon.asm | 32 ldr r12, [sp, #4] ; load thresh 34 vdup.u8 q2, r12 ; thresh 35 add r12, r0, r1, lsr #1 ; move src pointer up by 1 line 38 vld1.u8 {q4}, [r12@128], r1 ; p2 40 vld1.u8 {q6}, [r12@128], r1 ; p0 42 vld1.u8 {q8}, [r12@128], r1 ; q1 44 vld1.u8 {q10}, [r12@128], r1 ; q3 48 sub r12, r12, r1, lsl #2 49 add r0, r12, r1, lsr #1 51 vst1.u8 {q4}, [r12@128],r1 ; store op2 [all …]
|
D | loopfiltersimpleverticaledge_neon.asm | 26 add r12, r1, r1 29 vld4.8 {d6[0], d7[0], d8[0], d9[0]}, [r0], r12 30 vld4.8 {d6[1], d7[1], d8[1], d9[1]}, [r3], r12 31 vld4.8 {d6[2], d7[2], d8[2], d9[2]}, [r0], r12 32 vld4.8 {d6[3], d7[3], d8[3], d9[3]}, [r3], r12 33 vld4.8 {d6[4], d7[4], d8[4], d9[4]}, [r0], r12 34 vld4.8 {d6[5], d7[5], d8[5], d9[5]}, [r3], r12 35 vld4.8 {d6[6], d7[6], d8[6], d9[6]}, [r0], r12 36 vld4.8 {d6[7], d7[7], d8[7], d9[7]}, [r3], r12 38 vld4.8 {d10[0], d11[0], d12[0], d13[0]}, [r0], r12 [all …]
|
D | loopfilter_neon.asm | 31 add r12, r2, r1 37 vld1.u8 {q4}, [r12@128], r1 ; p2 39 vld1.u8 {q6}, [r12@128], r1 ; p0 41 vld1.u8 {q8}, [r12@128], r1 ; q1 43 vld1.u8 {q10}, [r12@128] ; q3 46 sub r12, r12, r1, lsl #1 51 vst1.u8 {q6}, [r12@128], r1 ; store op0 53 vst1.u8 {q8}, [r12@128], r1 ; store oq1 69 ldr r12, [sp, #4] ; load thresh 71 vdup.u8 q2, r12 ; duplicate thresh [all …]
|
D | idct_dequant_0_2x_neon.asm | 26 add r12, r2, #4 28 vld1.32 {d8[0]}, [r12], r3 30 vld1.32 {d8[1]}, [r12], r3 32 vld1.32 {d10[0]}, [r12], r3 34 vld1.32 {d10[1]}, [r12], r3 36 ldrh r12, [r0] ; lo q 42 sxth r12, r12 ; lo 43 mul r0, r12, r1
|
D | variance_neon.asm | 33 mov r12, #8 53 subs r12, r12, #1 70 ldr r12, [sp] ;load *sse from stack 79 ;str r1, [r12] 85 vst1.32 {d1[0]}, [r12] ;store sse 106 mov r12, #4 123 subs r12, r12, #1 140 ldr r12, [sp] ;load *sse from stack 147 vst1.32 {d1[0]}, [r12] ;store sse 169 mov r12, #8 [all …]
|
/hardware/intel/common/omx-components/videocodec/libvpx_internal/libvpx/vp8/common/arm/armv6/ |
D | loopfilter_v6.asm | 88 ldr r12, [src], pstep ; p0 99 uqsub8 r6, r11, r12 ; p1 - p0 101 uqsub8 r7, r12, r11 ; p0 - p1 111 uqsub8 r11, r12, r9 ; p0 - q0 112 uqsub8 r12, r9, r12 ; q0 - p0 115 orr r12, r11, r12 ; abs (p0-q0) 117 uqadd8 r12, r12, r12 ; abs (p0-q0) * 2 120 uqadd8 r12, r12, r6 ; abs (p0-q0)*2 + abs (p1-q1)/2 122 uqsub8 r12, r12, r4 ; compare to flimit 125 orr lr, lr, r12 [all …]
|
D | dequant_idct_v6.asm | 29 mov r12, #4 44 subs r12, r12, #1 63 ldr r12, [r0, #24] 70 smulwt r11, r3, r12 73 smulwt r7, r4, r12 74 smulwb r9, r3, r12 75 smulwb r10, r4, r12 80 uadd16 r7, r12, r9 104 smulwt r12, r4, r6 109 pkhbt r12, r10, r12, lsl #16 [all …]
|
D | intra4x4_predict_v6.asm | 34 push {r4-r12, lr} 38 pop {r4-r12, pc} ; default 57 usad8 r12, r8, r9 64 add r4, r4, r12 67 mov r12, r4, asr #3 ; (expected_dc + 4) >> 3 69 add r12, r12, r12, lsl #8 71 add r12, r12, r12, lsl #16 74 str r12, [r3], r0 75 str r12, [r3], r0 76 str r12, [r3], r0 [all …]
|
D | iwalsh_v6.asm | 22 stmdb sp!, {r4 - r12, lr} 35 qsub16 r12, r4, r6 ; c1 [5-9 | 4-8] 39 qadd16 r4, r12, lr ; c1 + d1 [5 | 4] 41 qsub16 r8, lr, r12 ; d1 - c1 [13 | 12] 45 qsub16 r12, r5, r7 ; c1 [7-11 | 6-10] 49 qadd16 r5, r12, lr ; c1 + d1 [7 | 6] 51 qsub16 r9, lr, r12 ; d1 - c1 [15 | 14] 57 qsubaddx r12, r4, r5 ; [c1|a1] [5-6 | 4+7] 63 qaddsubx r4, r12, lr ; [b2|c2] [c1+d1 | a1-b1] 64 qaddsubx r5, lr, r12 ; [a2|d2] [b1+a1 | d1-c1] [all …]
|
D | copymem8x8_v6.asm | 39 mov r12, #8 48 subs r12, r12, #1 83 mov r12, #8 86 subs r12, r12, #1 108 mov r12, #8 115 subs r12, r12, #1
|
D | copymem8x4_v6.asm | 39 mov r12, #4 48 subs r12, r12, #1 83 mov r12, #4 86 subs r12, r12, #1 108 mov r12, #4 115 subs r12, r12, #1
|
D | copymem16x16_v6.asm | 42 mov r12, #16 55 subs r12, r12, #1 106 mov r12, #16 109 subs r12, r12, #1 137 mov r12, #16 147 subs r12, r12, #1 165 mov r12, #16 172 subs r12, r12, #1
|
D | idct_v6.asm | 39 ldr r12, [r0, #(12*2)] ; i13|i12 47 smulbt r11, r5, r12 ; (ip[13] * cospi8sqrt2minus1) >> 16 52 smulwt r7, r4, r12 ; (ip[13] * sinpi8sqrt2) >> 16 53 smulbb r9, r5, r12 ; (ip[12] * cospi8sqrt2minus1) >> 16 54 smulwb r10, r4, r12 ; (ip[12] * sinpi8sqrt2) >> 16 61 uadd16 r7, r12, r9 ; 13c+13 | 12c+12 87 ldr r12,[r0, #(2*2)] ; i3 | i2 102 pkhbt r9, r14, r12, lsl #16 ; i2 | i6 105 pkhtb r6, r12, r14, asr #16 ; i3 | i7 111 smulbb r12, r5, r6 ; (ip[7] * cospi8sqrt2minus1) >> 16 [all …]
|
D | sixtappredict8x4_v6.asm | 35 adr r12, filter8_coeff 41 add r2, r12, r2, lsl #4 ;calculate filter location 69 smuad r12, r7, r3 75 smlad r12, r9, r4, r12 80 smlad r12, r6, r5, r12 87 add r12, r12, #0x40 89 usat r12, #8, r12, asr #7 91 strh r12, [lr], #20 94 movne r12, r7 100 movne r10, r12 [all …]
|
/hardware/intel/common/omx-components/videocodec/libvpx_internal/libvpx/vp9/common/arm/neon/ |
D | vp9_idct16x16_1_add_neon.asm | 29 mov r12, #0x2d00 30 add r12, #0x41 33 mul r0, r0, r12 ; input[0] * cospi_16_64 38 mul r0, r0, r12 ; out * cospi_16_64 39 mov r12, r1 ; save dest 69 vst1.64 {d2}, [r12], r0 70 vst1.64 {d3}, [r12], r2 71 vst1.64 {d30}, [r12], r0 72 vst1.64 {d31}, [r12], r2 82 vst1.64 {d2}, [r12], r0 [all …]
|
D | vp9_idct8x8_1_add_neon.asm | 29 mov r12, #0x2d00 30 add r12, #0x41 33 mul r0, r0, r12 ; input[0] * cospi_16_64 38 mul r0, r0, r12 ; out * cospi_16_64 39 mov r12, r1 ; save dest 67 vst1.64 {d2}, [r12], r2 68 vst1.64 {d3}, [r12], r2 69 vst1.64 {d30}, [r12], r2 70 vst1.64 {d31}, [r12], r2 80 vst1.64 {d2}, [r12], r2 [all …]
|
D | vp9_idct4x4_1_add_neon.asm | 29 mov r12, #0x2d00 30 add r12, #0x41 33 mul r0, r0, r12 ; input[0] * cospi_16_64 38 mul r0, r0, r12 ; out * cospi_16_64 39 mov r12, r1 ; save dest 60 vst1.32 {d6[0]}, [r12], r2 61 vst1.32 {d6[1]}, [r12], r2 62 vst1.32 {d7[0]}, [r12], r2 63 vst1.32 {d7[1]}, [r12]
|
D | vp9_dc_only_idct_add_neon.asm | 30 mov r12, #0x2d00 31 add r12, #0x41 34 mul r0, r0, r12 ; input_dc * cospi_16_64 39 mul r0, r0, r12 ; out * cospi_16_64 48 ldr r12, [sp] ; load stride 61 vst1.32 {d2[0]}, [r2], r12 62 vst1.32 {d2[1]}, [r2], r12 63 vst1.32 {d4[0]}, [r2], r12
|
D | vp9_idct32x32_1_add_neon.asm | 81 mov r12, #0x2d00 82 add r12, #0x41 85 mul r0, r0, r12 ; input[0] * cospi_16_64 90 mul r0, r0, r12 ; out * cospi_16_64 91 mov r12, r1 ; save dest 110 ST_16x8 r12, r2 114 ST_16x8 r12, r2 117 moveq r12, r3 131 ST_16x8 r12, r2 135 ST_16x8 r12, r2 [all …]
|
/hardware/intel/common/omx-components/videocodec/libvpx_internal/libvpx/vp8/encoder/arm/neon/ |
D | subtract_neon.asm | 68 mov r12, #4 101 subs r12, r12, #1 119 mov r12, #32 ; "diff" stride x2 149 vst1.16 {q8}, [r0], r12 ;store diff 150 vst1.16 {q9}, [r7], r12 151 vst1.16 {q10}, [r0], r12 152 vst1.16 {q11}, [r7], r12 153 vst1.16 {q12}, [r0], r12 154 vst1.16 {q13}, [r7], r12 155 vst1.16 {q14}, [r0], r12 [all …]
|
/hardware/intel/common/omx-components/videocodec/libvpx_internal/libvpx/vp8/encoder/arm/armv6/ |
D | vp8_short_fdct4x4_armv6.asm | 21 stmfd sp!, {r4 - r12, lr} 30 ldr r12, c0x22a453a0 ; [2217*4 | 5352*4] 46 smlad r6, r7, r12, r11 ; o1 = (c1 * 2217 + d1 * 5352 + 14500) 47 smlsdx r7, r7, r12, r10 ; o3 = (d1 * 2217 - c1 * 5352 + 7500) 71 smlad r6, r7, r12, r11 ; o5 = (c1 * 2217 + d1 * 5352 + 14500) 72 smlsdx r7, r7, r12, r10 ; o7 = (d1 * 2217 - c1 * 5352 + 7500) 96 smlad r6, r7, r12, r11 ; o9 = (c1 * 2217 + d1 * 5352 + 14500) 97 smlsdx r7, r7, r12, r10 ; o11 = (d1 * 2217 - c1 * 5352 + 7500) 119 smlad r6, r7, r12, r11 ; o13 = (c1 * 2217 + d1 * 5352 + 14500) 120 smlsdx r7, r7, r12, r10 ; o15 = (d1 * 2217 - c1 * 5352 + 7500) [all …]
|
D | vp8_fast_quantize_b_armv6.asm | 52 ldr r12, [r3], #4 ; [z3 | z2] 59 ssat16 r11, #1, r12 ; [sz3 | sz2] 60 eor r12, r12, r11 ; [z3 ^ sz3 | z2 ^ sz2] 63 ssub16 r12, r12, r11 ; x = (z ^ sz) - sz 65 sadd16 r12, r12, r10 ; [x3+r3 | x2+r2] 69 smulbb r10, r12, r9 ; [(x2+r2)*q2] 70 smultt r12, r12, r9 ; [(x3+r3)*q3] 80 pkhtb r10, r12, r10, asr #16 ; [y3 | y2] 90 smulbb r12, r0, r9 ; [x0*dq0] 97 strh r12, [r7, #0] ; dqcoeff[0] = [x0*dq0] [all …]
|
/hardware/intel/common/omx-components/videocodec/libvpx_internal/libvpx/vp8/encoder/arm/armv5te/ |
D | vp8_packtokens_armv5.asm | 29 push {r0-r3, r12, lr} ; rest of regs are preserved by subroutine call 35 pop {r0-r3, r12, lr} 46 push {r4-r12, lr} 79 ; v is kept in r12 during the token pack loop 80 lsl r12, r6, r4 ; r12 = v << 32 - n 90 lsls r12, r12, #1 ; bb = v >> n 168 add r12, r7, r6, lsl #4 ; b = vp8_extra_bits + t 170 ldr r4, [r12, #vp8_extra_bit_struct_base_val] 175 ldr r8, [r12, #vp8_extra_bit_struct_len] ; L 180 ldr r9, [r12, #vp8_extra_bit_struct_prob] [all …]
|