/hardware/intel/common/omx-components/videocodec/libvpx_internal/libvpx/vp8/encoder/arm/neon/ |
D | vp8_mse16x16_neon.asm | 30 vmov.i8 q7, #0 ;q7, q8, q9, q10 - sse 48 vmlal.s16 q7, d22, d22 55 vmlal.s16 q7, d26, d26 62 vadd.u32 q7, q7, q8 67 vadd.u32 q10, q7, q9 99 vmull.s16 q7, d22, d22 104 vadd.u32 q7, q7, q8 106 vadd.u32 q9, q7, q9
|
D | fastquantizeb_neon.asm | 27 vstmdb sp!, {q4-q7} 44 vld1.s16 {q6, q7}, [r6@128] ; load round_ptr [0-15] 50 vadd.s16 q5, q7 67 vadd.s16 q11, q7 77 vld1.s16 {q6, q7}, [r8@128] ;load dequant_ptr[i] 99 vmul.s16 q3, q7, q5 108 vmul.s16 q13, q7, q11 110 vld1.16 {q6, q7}, [r0@128] ; load inverse scan order 120 vand q1, q7, q15 130 vand q11, q7, q3 [all …]
|
D | vp8_memcpy_neon.asm | 38 vld1.8 {q6, q7}, [r1]! 41 vst1.8 {q6, q7}, [r0]!
|
/hardware/intel/common/omx-components/videocodec/libvpx_internal/libvpx/vp8/common/arm/neon/ |
D | idct_dequant_full_2x_neon.asm | 58 ; q7: 12 * sinpi : d1/temp2 62 vqdmulh.s16 q7, q5, d0[2] 88 vqadd.s16 q3, q4, q7 97 vqsub.s16 q7, q10, q3 101 vtrn.32 q5, q7 103 vtrn.16 q6, q7 108 ; q7: l 3, 7,11,15 r 3, 7,11,15 115 vqdmulh.s16 q9, q7, d0[2] 117 vqdmulh.s16 q11, q7, d0[0] 129 vqadd.s16 q11, q7, q11 [all …]
|
D | sixtappredict4x4_neon.asm | 85 vmull.u8 q7, d18, d5 ;(src_ptr[3] * vp8_filter[5]) 95 vmlal.u8 q7, d6, d0 ;+(src_ptr[-2] * vp8_filter[0]) 102 vmlsl.u8 q7, d18, d1 ;-(src_ptr[-1] * vp8_filter[1]) 109 vmlsl.u8 q7, d6, d4 ;-(src_ptr[2] * vp8_filter[4]) 116 vmlal.u8 q7, d18, d2 ;(src_ptr[0] * vp8_filter[2]) 127 vqadd.s16 q7, q9 ;sum of all (src_data*filter_parameters) 133 vqrshrun.s16 d27, q7, #7 ;shift/round/saturate to u8 150 vmull.u8 q7, d18, d5 ;(src_ptr[3] * vp8_filter[5]) 162 vmlal.u8 q7, d6, d0 ;+(src_ptr[-2] * vp8_filter[0]) 172 vmlsl.u8 q7, d18, d1 ;-(src_ptr[-1] * vp8_filter[1]) [all …]
|
D | sixtappredict8x4_neon.asm | 79 vmull.u8 q7, d6, d0 ;(src_ptr[-2] * vp8_filter[0]) 89 vmlsl.u8 q7, d28, d1 ;-(src_ptr[-1] * vp8_filter[1]) 99 vmlsl.u8 q7, d28, d4 ;-(src_ptr[2] * vp8_filter[4]) 109 vmlal.u8 q7, d28, d2 ;(src_ptr[0] * vp8_filter[2]) 119 vmlal.u8 q7, d28, d5 ;(src_ptr[3] * vp8_filter[5]) 134 vqadd.s16 q7, q3 ;sum of all (src_data*filter_parameters) 141 vqrshrun.s16 d22, q7, #7 ;shift/round/saturate to u8 152 vld1.u8 {q7}, [r0], r1 220 vmull.u8 q7, d31, d3 226 vqadd.s16 q12, q7 [all …]
|
D | mbloopfilter_neon.asm | 41 vld1.u8 {q7}, [r0@128], r1 ; q0 54 vst1.u8 {q7}, [r0@128],r1 ; store oq0 156 vtrn.32 q3, q7 163 vtrn.16 q7, q9 168 vtrn.8 q7, q8 178 vtrn.32 q3, q7 185 vtrn.16 q7, q9 190 vtrn.8 q7, q8 251 vtrn.32 q3, q7 258 vtrn.16 q7, q9 [all …]
|
D | sixtappredict8x8_neon.asm | 83 vmull.u8 q7, d6, d0 ;(src_ptr[-2] * vp8_filter[0]) 93 vmlsl.u8 q7, d28, d1 ;-(src_ptr[-1] * vp8_filter[1]) 103 vmlsl.u8 q7, d28, d4 ;-(src_ptr[2] * vp8_filter[4]) 113 vmlal.u8 q7, d28, d2 ;(src_ptr[0] * vp8_filter[2]) 123 vmlal.u8 q7, d28, d5 ;(src_ptr[3] * vp8_filter[5]) 140 vqadd.s16 q7, q3 ;sum of all (src_data*filter_parameters) 147 vqrshrun.s16 d22, q7, #7 ;shift/round/saturate to u8 167 vld1.u8 {q7}, [r0], r1 233 vmull.u8 q7, d31, d3 239 vqadd.s16 q12, q7 [all …]
|
D | vp8_subpixelvariance8x8_neon.asm | 50 vmull.u8 q7, d4, d0 60 vmlal.u8 q7, d5, d1 67 vqrshrn.u16 d23, q7, #7 77 vmull.u8 q7, d4, d0 89 vmlal.u8 q7, d5, d1 95 vqrshrn.u16 d27, q7, #7 119 vmull.u8 q7, d28, d0 128 vmlal.u8 q7, d29, d1 137 vqrshrn.u16 d28, q7, #7 180 vsubl.u8 q7, d25, d3 [all …]
|
D | loopfiltersimplehorizontaledge_neon.asm | 28 vld1.u8 {q7}, [r0@128], r1 ; q0 33 vabd.u8 q15, q6, q7 ; abs(p0 - q0) 42 veor q7, q7, q0 ; qs0: q0 offset to convert to a signed value 77 vqsub.s8 q10, q7, q4 ; u = vp8_signed_char_clamp(qs0 - Filter1) 80 veor q7, q10, q0 ; *oq0 = u^0x80 83 vst1.u8 {q7}, [r0@128] ; store oq0
|
D | loopfilter_neon.asm | 40 vld1.u8 {q7}, [r2@128], r1 ; q0 52 vst1.u8 {q7}, [r2@128], r1 ; store oq0 149 vtrn.32 q3, q7 158 vtrn.16 q7, q9 163 vtrn.8 q7, q8 239 vtrn.32 q3, q7 248 vtrn.16 q7, q9 253 vtrn.8 q7, q8 299 ; q7 q0 309 vabd.u8 q14, q8, q7 ; abs(q1 - q0) [all …]
|
D | vp8_subpixelvariance16x16s_neon.asm | 58 vext.8 q7, q6, q7, #1 64 vrhadd.u8 q3, q6, q7 69 vsubl.u8 q7, d3, d25 87 vpadal.s16 q8, q7 154 vld1.8 {q7}, [r2], r3 256 vext.8 q7, q6, q7, #1 261 vrhadd.u8 q3, q6, q7 268 vld1.8 {q7}, [r2], r3 383 vext.8 q7, q6, q7, #1 388 vrhadd.u8 q3, q6, q7 [all …]
|
D | sad16_neon.asm | 43 vld1.8 {q7}, [r2], r3 68 vld1.8 {q7}, [r2], r3 93 vld1.8 {q7}, [r2], r3 118 vld1.8 {q7}, [r2] 162 vld1.8 {q7}, [r2], r3 186 vld1.8 {q7}, [r2], r3
|
D | sixtappredict16x16_neon.asm | 177 vmull.u8 q7, d31, d3 183 vqadd.s16 q11, q7 206 vabs.s32 q7, q5 257 vmull.u8 q7, d21, d3 ;(src_ptr[1] * vp8_filter[3]) 264 vqadd.s16 q7, q3 ;sum of all (src_data*filter_parameters) 269 vqrshrun.s16 d6, q7, #7 ;shift/round/saturate to u8 320 vmull.u8 q7, d7, d0 337 vmlsl.u8 q7, d22, d1 ;-(src_ptr[-1] * vp8_filter[1]) 341 vmlsl.u8 q7, d26, d4 ;-(src_ptr[2] * vp8_filter[4]) 358 vmlal.u8 q7, d20, d5 ;(src_ptr[3] * vp8_filter[5]) [all …]
|
D | vp8_subpixelvariance16x16_neon.asm | 70 vmull.u8 q7, d2, d0 ;(src_ptr[0] * Filter[0]) 84 vmlal.u8 q7, d2, d1 ;(src_ptr[0] * Filter[1]) 101 vqrshrn.u16 d14, q7, #7 ;shift/round/saturate to u8 206 vmull.u8 q7, d28, d0 215 vmlal.u8 q7, d30, d1 226 vqrshrn.u16 d8, q7, #7 258 vmull.u8 q7, d2, d0 ;(src_ptr[0] * Filter[0]) 272 vmlal.u8 q7, d2, d1 ;(src_ptr[0] * Filter[1]) 289 vqrshrn.u16 d14, q7, #7 ;shift/round/saturate to u8 333 vmull.u8 q7, d28, d0 [all …]
|
/hardware/intel/common/omx-components/videocodec/libvpx_internal/libvpx/vp9/common/arm/neon/ |
D | vp9_idct32x32_add_neon.asm | 112 vrshr.s16 q7, q7, #6 117 vaddw.u8 q7, q7, d9 122 vqmovun.s16 d9, q7 146 vrshr.s16 q7, q7, #6 151 vaddw.u8 q7, q7, d9 156 vqmovun.s16 d9, q7 171 ; q4-q7 contain the results (out[j * 32 + 0-31]) 182 vrshr.s16 q7, q7, #6 187 vaddw.u8 q7, q7, d7 192 vqmovun.s16 d7, q7 [all …]
|
D | vp9_loopfilter_16_neon.asm | 61 vld1.u8 {q7}, [r2@64], r1 ; q0 73 vst1.u8 {q7}, [r2@64], r1 ; store oq0 94 ; q7 q0 102 ; q7 oq0 110 vabd.u8 q14, q8, q7 ; m4 = abs(q1 - q0) 118 vabd.u8 q9, q6, q7 ; abs(p0 - q0) 133 veor q7, q7, q10 ; qs0 180 vqsub.s8 q0, q7, q1 ; u = clamp(qs0 - filter1) 185 veor q7, q0, q10 ; *oq0 = u^0x80
|
D | vp9_iht8x8_add_neon.asm | 120 ; will be stored back into q8-q15 registers. This macro will touch q0-q7 243 vsub.s16 q14, q7, q6 ; step2[6] = -step1[6] + step1[7] 244 vadd.s16 q7, q7, q6 ; step2[7] = step1[6] + step1[7] 271 vadd.s16 q8, q0, q7 ; output[0] = step1[0] + step1[7]; 278 vsub.s16 q15, q0, q7 ; output[7] = step1[0] - step1[7]; 284 ; q0 - q7 registers and use them as buffer during calculation. 314 vmull.s16 q7, d22, d31 322 vmlsl.s16 q7, d24, d30 341 vadd.s32 q12, q3, q7 345 vsub.s32 q3, q3, q7 [all …]
|
D | vp9_idct8x8_add_neon.asm | 21 ; This macro will touch q0-q7 registers and use them as buffer during 145 vsub.s16 q14, q7, q6 ; step2[6] = -step1[6] + step1[7] 146 vadd.s16 q7, q7, q6 ; step2[7] = step1[6] + step1[7] 173 vadd.s16 q8, q0, q7 ; output[0] = step1[0] + step1[7]; 180 vsub.s16 q15, q0, q7 ; output[7] = step1[0] - step1[7]; 377 vqrdmulh.s16 q7, q9, q1 418 vsub.s16 q14, q7, q6 ; step2[6] = -step1[6] + step1[7] 419 vadd.s16 q7, q7, q6 ; step2[7] = step1[6] + step1[7] 446 vadd.s16 q8, q0, q7 ; output[0] = step1[0] + step1[7]; 453 vsub.s16 q15, q0, q7 ; output[7] = step1[0] - step1[7];
|
D | vp9_idct16x16_add_neon.asm | 47 ; will be stored back into q8-q15 registers. This function will touch q0-q7 208 vsub.s16 q14, q7, q6 ; step2[6] = -step1[6] + step1[7]; 209 vadd.s16 q15, q6, q7 ; step2[7] = step1[6] + step1[7]; 293 ; will be stored back into q8-q15 registers. This function will touch q0-q7 465 vsub.s16 q14, q7, q6 ; step1[14]=-step2[14]+tep2[15] 466 vadd.s16 q7, q6, q7 ; step1[15]=step2[14]+step2[15] 540 vsub.s16 q12, q7, q4 ; step1[12] =-step2[12]+step2[15]; 543 vadd.s16 q15, q7, q4 ; step1[15] =step2[12]+step2[15]; 797 ; will be stored back into q8-q15 registers. This function will touch q0-q7 841 vqrdmulh.s16 q7, q9, q1 [all …]
|
/hardware/intel/common/omx-components/videocodec/libvpx_internal/libvpx/vp9/common/mips/dspr2/ |
D | vp9_loopfilter_macros_dspr2.h | 383 [q4] "r" (q4), [q5] "r" (q5), [q6] "r" (q6), [q7] "r" (q7) \ 423 [q4] "r" (q4), [q5] "r" (q5), [q6] "r" (q6), [q7] "r" (q7) \
|
D | vp9_mblpf_vert_loopfilter_dspr2.c | 34 uint32_t p7, p6, p5, p4, p3, p2, p1, p0, q0, q1, q2, q3, q4, q5, q6, q7; in vp9_lpf_vertical_16_dspr2() local 94 [q0] "=&r" (q0), [q7] "=&r" (q7), [q6] "=&r" (q6), in vp9_lpf_vertical_16_dspr2() 246 [q7] "+r" (q7), [q6] "+r" (q6), [q5] "+r" (q5), [q4] "+r" (q4), in vp9_lpf_vertical_16_dspr2() 255 vp9_flatmask5(p7, p6, p5, p4, p0, q0, q4, q5, q6, q7, &flat2); in vp9_lpf_vertical_16_dspr2()
|
/hardware/intel/common/omx-components/videocodec/libvpx_internal/libvpx/vp9/common/ |
D | vp9_loopfilter_filters.c | 246 q4 = *oq4, q5 = *oq5, q6 = *oq6, q7 = *oq7; in filter16() local 264 q0 * 2 + q1 + q2 + q3 + q4 + q5 + q6 + q7, 4); in filter16() 266 q0 + q1 * 2 + q2 + q3 + q4 + q5 + q6 + q7 * 2, 4); in filter16() 268 q0 + q1 + q2 * 2 + q3 + q4 + q5 + q6 + q7 * 3, 4); in filter16() 270 q0 + q1 + q2 + q3 * 2 + q4 + q5 + q6 + q7 * 4, 4); in filter16() 272 q0 + q1 + q2 + q3 + q4 * 2 + q5 + q6 + q7 * 5, 4); in filter16() 274 q0 + q1 + q2 + q3 + q4 + q5 * 2 + q6 + q7 * 6, 4); in filter16() 276 q0 + q1 + q2 + q3 + q4 + q5 + q6 * 2 + q7 * 7, 4); in filter16()
|
/hardware/intel/common/omx-components/videocodec/libvpx_internal/libvpx/vpx_scale/arm/neon/ |
D | vp8_vpxyv12_copysrcframe_func_neon.asm | 56 vld1.8 {q6, q7}, [r10]! 66 vst1.8 {q6, q7}, [r11]! 170 vld1.8 {q6, q7}, [r10]! 176 vst1.8 {q6, q7}, [r11]!
|
D | vp8_vpxyv12_extendframeborders_neon.asm | 54 vmov q7, q6 65 vst1.8 {q6, q7}, [r6], lr 93 vld1.8 {q6, q7}, [r1]! 107 vst1.8 {q6, q7}, [r5]! 174 vst1.8 {q7}, [r6], lr
|