/hardware/intel/common/omx-components/videocodec/libvpx_internal/libvpx/vp8/common/arm/neon/ |
D | dequant_idct_neon.c | 24 int16x8_t q1, q2, q3, q4, q5, q6; in vp8_dequant_idct_add_neon() local 32 q3 = vld1q_s16(input); in vp8_dequant_idct_add_neon() 53 q1 = vreinterpretq_s16_u16(vmulq_u16(vreinterpretq_u16_s16(q3), in vp8_dequant_idct_add_neon() 63 q3 = vqdmulhq_n_s16(q2, sinpi8sqrt2); in vp8_dequant_idct_add_neon() 66 q3 = vshrq_n_s16(q3, 1); in vp8_dequant_idct_add_neon() 69 q3 = vqaddq_s16(q3, q2); in vp8_dequant_idct_add_neon() 72 d10 = vqsub_s16(vget_low_s16(q3), vget_high_s16(q4)); in vp8_dequant_idct_add_neon() 73 d11 = vqadd_s16(vget_high_s16(q3), vget_low_s16(q4)); in vp8_dequant_idct_add_neon() 90 q3 = vqdmulhq_n_s16(q2, sinpi8sqrt2); in vp8_dequant_idct_add_neon() 96 q3 = vshrq_n_s16(q3, 1); in vp8_dequant_idct_add_neon() [all …]
|
D | buildintrapredictorsmby_neon.asm | 61 vpaddl.u16 q3, q2 62 vpaddl.u32 q4, q3 181 vdup.u8 q3, r6 185 vst1.u8 {q3}, [r1]! 194 vdup.u8 q3, r6 198 vst1.u8 {q3}, [r1]! 208 vdup.u8 q3, r6 212 vst1.u8 {q3}, [r1]! 221 vdup.u8 q3, r6 225 vst1.u8 {q3}, [r1]! [all …]
|
D | vp8_subpixelvariance16x16s_neon.asm | 56 vext.8 q3, q2, q3, #1 62 vrhadd.u8 q1, q2, q3 64 vrhadd.u8 q3, q6, q7 73 vsubl.u8 q3, d7, d29 100 vpadal.s16 q8, q3 148 vld1.8 {q3}, [r2], r3 166 vsubl.u8 q3, d13, d15 196 vpadal.s16 q8, q3 254 vext.8 q3, q2, q3, #1 ;construct src_ptr[1] 259 vrhadd.u8 q1, q2, q3 ;(src_ptr[0]+src_ptr[1])/round/shift right 1 [all …]
|
D | idct_dequant_full_2x_neon.asm | 26 vld1.16 {q2, q3}, [r0] ; l q 45 vmul.i16 q3, q3, q1 51 ; q2: l0r0 q3: l8r8 66 vqadd.s16 q10, q2, q3 ; a1 = 0 + 8 67 vqsub.s16 q11, q2, q3 ; b1 = 0 - 8 88 vqadd.s16 q3, q4, q7 94 vqadd.s16 q4, q10, q3 97 vqsub.s16 q7, q10, q3 120 vqsub.s16 q3, q4, q6 ; b1 = 0 - 2 141 vqadd.s16 q5, q3, q8 [all …]
|
D | loopfiltersimplehorizontaledge_neon.asm | 50 vsubl.s8 q3, d15, d13 55 vmul.s16 q3, q3, q13 61 vaddw.s8 q3, q3, d9 64 vqmovn.s16 d9, q3 69 vqadd.s8 q3, q14, q9 ; Filter1 = vp8_signed_char_clamp(vp8_filter+4) 71 vshr.s8 q4, q3, #3 ; Filter1 >>= 3
|
D | sixtappredict4x4_neon.asm | 60 vld1.u8 {q3}, [r0], r1 ;load first 4-line src data 88 vmov q4, q3 ;keep original src data in q4 q6 100 vshr.u64 q3, q4, #32 ;construct src_ptr[2] 114 vshr.u64 q3, q4, #24 ;construct src_ptr[1] 124 vld1.u8 {q3}, [r0], r1 ;load rest 5-line src data 154 vmov q4, q3 ;keep original src data in q4 q6 168 vshr.u64 q3, q4, #32 ;construct src_ptr[2] 188 vshr.u64 q3, q4, #24 ;construct src_ptr[1] 231 vmull.u8 q3, d27, d0 ;(src_ptr[-2] * vp8_filter[0]) 237 vmlsl.u8 q3, d29, d4 ;-(src_ptr[2] * vp8_filter[4]) [all …]
|
D | loopfilter_neon.asm | 36 vld1.u8 {q3}, [r2@128], r1 ; p3 43 vld1.u8 {q10}, [r12@128] ; q3 90 vld1.u8 {d20}, [r3@64] ; q3 91 vld1.u8 {d21}, [r12@64] ; q3 149 vtrn.32 q3, q7 156 vtrn.16 q3, q5 161 vtrn.8 q3, q4 239 vtrn.32 q3, q7 246 vtrn.16 q3, q5 251 vtrn.8 q3, q4 [all …]
|
D | mbloopfilter_neon.asm | 37 vld1.u8 {q3}, [r0@128], r1 ; p3 44 vld1.u8 {q10}, [r12@128], r1 ; q3 95 vld1.u8 {d20}, [r0@64], r1 ; q3 96 vld1.u8 {d21}, [r12@64], r1 ; q3 156 vtrn.32 q3, q7 161 vtrn.16 q3, q5 166 vtrn.8 q3, q4 178 vtrn.32 q3, q7 183 vtrn.16 q3, q5 188 vtrn.8 q3, q4 [all …]
|
D | sixtappredict8x4_neon.asm | 67 vld1.u8 {q3}, [r0], r1 ;load src data 129 vmull.u8 q3, d28, d3 ;(src_ptr[1] * vp8_filter[3]) 134 vqadd.s16 q7, q3 ;sum of all (src_data*filter_parameters) 139 vld1.u8 {q3}, [r0], r1 ;load src data 216 vmull.u8 q3, d27, d3 ;(src_ptr[1] * vp8_filter[3]) 222 vqadd.s16 q8, q3 ;sum of all (src_data*filter_parameters) 254 vmull.u8 q3, d22, d0 ;(src_ptr[-2] * vp8_filter[0]) 259 vmlsl.u8 q3, d23, d1 ;-(src_ptr[-1] * vp8_filter[1]) 264 vmlsl.u8 q3, d26, d4 ;-(src_ptr[2] * vp8_filter[4]) 269 vmlal.u8 q3, d24, d2 ;(src_ptr[0] * vp8_filter[2]) [all …]
|
D | sixtappredict8x8_neon.asm | 70 vld1.u8 {q3}, [r0], r1 ;load src data 133 vmull.u8 q3, d28, d3 ;(src_ptr[1] * vp8_filter[3]) 140 vqadd.s16 q7, q3 ;sum of all (src_data*filter_parameters) 145 vld1.u8 {q3}, [r0], r1 ;load src data 163 ;vld1.u8 {q3}, [r0], r1 ;load src data 229 vmull.u8 q3, d27, d3 ;(src_ptr[1] * vp8_filter[3]) 235 vqadd.s16 q8, q3 ;sum of all (src_data*filter_parameters) 272 vmull.u8 q3, d18, d0 ;(src_ptr[-2] * vp8_filter[0]) 277 vmlsl.u8 q3, d19, d1 ;-(src_ptr[-1] * vp8_filter[1]) 282 vmlsl.u8 q3, d22, d4 ;-(src_ptr[2] * vp8_filter[4]) [all …]
|
/hardware/intel/common/omx-components/videocodec/libvpx_internal/libvpx/vp9/common/ |
D | vp9_loopfilter_filters.c | 25 uint8_t q2, uint8_t q3) { in filter_mask() argument 32 mask |= (abs(q3 - q2) > limit) * -1; in filter_mask() 41 uint8_t q2, uint8_t q3) { in flat_mask4() argument 48 mask |= (abs(q3 - q0) > thresh) * -1; in flat_mask4() 57 uint8_t q3, uint8_t q4) { in flat_mask5() argument 58 int8_t mask = ~flat_mask4(thresh, p3, p2, p1, p0, q0, q1, q2, q3); in flat_mask5() 114 const uint8_t q0 = s[0 * p], q1 = s[1 * p], q2 = s[2 * p], q3 = s[3 * p]; in vp9_lpf_horizontal_4_c() local 116 p3, p2, p1, p0, q0, q1, q2, q3); in vp9_lpf_horizontal_4_c() 139 const uint8_t q0 = s[0], q1 = s[1], q2 = s[2], q3 = s[3]; in vp9_lpf_vertical_4_c() local 141 p3, p2, p1, p0, q0, q1, q2, q3); in vp9_lpf_vertical_4_c() [all …]
|
/hardware/intel/common/omx-components/videocodec/libvpx_internal/libvpx/vp9/common/mips/dspr2/ |
D | vp9_loopfilter_masks_dspr2.h | 31 uint32_t q2, uint32_t q3, in vp9_filter_hev_mask_dspr2() argument 95 [q2] "r" (q2), [q3] "r" (q3), [thresh] "r" (thresh) in vp9_filter_hev_mask_dspr2() 138 uint32_t q2, uint32_t q3, in vp9_filter_hev_mask_flatmask4_dspr2() argument 243 [q2] "r" (q2), [q3] "r" (q3), [thresh] "r" (thresh), in vp9_filter_hev_mask_flatmask4_dspr2() 286 uint32_t q3, uint32_t q4, in vp9_flatmask5() argument 362 [q2] "r" (q2), [q3] "r" (q3), [q4] "r" (q4), in vp9_flatmask5()
|
D | vp9_loopfilter_filters_dspr2.h | 377 const uint32_t q0 = *oq0, q1 = *oq1, q2 = *oq2, q3 = *oq3; in vp9_mbfilter_dspr2() local 436 [p2] "r" (p2), [q2] "r" (q2), [p3] "r" (p3), [q3] "r" (q3), in vp9_mbfilter_dspr2() 451 uint32_t q2, uint32_t q3, in vp9_mbfilter1_dspr2() argument 514 [p2] "r" (p2), [q2] "r" (q2), [p3] "r" (p3), [q3] "r" (q3), in vp9_mbfilter1_dspr2() 536 const uint32_t q0 = *oq0, q1 = *oq1, q2 = *oq2, q3 = *oq3; in vp9_wide_mbfilter_dspr2() local 565 [q0] "r" (q0), [q1] "r" (q1), [q2] "r" (q2), [q3] "r" (q3), in vp9_wide_mbfilter_dspr2() 652 [q3] "r" (q3), [q4] "r" (q4), [q5] "r" (q5), [q6] "r" (q6), in vp9_wide_mbfilter_dspr2() 744 [q3] "r" (q3), [q2] "r" (q2), [q1] "r" (q1), [q0] "r" (q0), in vp9_wide_mbfilter_dspr2()
|
/hardware/intel/common/omx-components/videocodec/libvpx_internal/libvpx/vp9/common/arm/neon/ |
D | vp9_avg_neon.asm | 37 vld1.8 {q2-q3}, [r0], lr 44 vrhadd.u8 q3, q3, q11 46 vst1.8 {q2-q3}, [r2@128], r4 53 vld1.8 {q2-q3}, [r0], r1 63 vrhadd.u8 q3, q3, q11 65 vst1.8 {q2-q3}, [r2@128], r3 74 vld1.8 {q3}, [r6@128], r3 80 vrhadd.u8 q1, q1, q3
|
D | vp9_iht8x8_add_neon.asm | 132 vmull.s16 q3, d19, d0 140 vmlsl.s16 q3, d31, d1 148 vqrshrn.s32 d9, q3, #14 ; >> 14 156 vmull.s16 q3, d19, d1 164 vmlal.s16 q3, d31, d0 172 vqrshrn.s32 d15, q3, #14 ; >> 14 183 vmull.s16 q3, d17, d0 191 vmlal.s16 q3, d25, d0 202 vqrshrn.s32 d19, q3, #14 ; >> 14 210 vmull.s16 q3, d21, d0 [all …]
|
D | vp9_loopfilter_16_neon.asm | 57 vld1.u8 {q3}, [r2@64], r1 ; p3 64 vld1.u8 {q10}, [r3@64] ; q3 90 ; q3 p3 97 ; q10 q3 107 vabd.u8 q11, q3, q4 ; m1 = abs(p3 - p2) 111 vabd.u8 q3, q9, q8 ; m5 = abs(q2 - q1) 112 vabd.u8 q4, q10, q9 ; m6 = abs(q3 - q2) 120 vmax.u8 q3, q3, q4 ; m9 = max(m5, m6) 128 vmax.u8 q15, q15, q3 ; m11 = max(m10, m9)
|
D | vp9_reconintra_neon.asm | 308 vsubl.u8 q3, d2, d0 316 vadd.s16 q1, q1, q3 317 vadd.s16 q2, q2, q3 328 vadd.s16 q1, q1, q3 329 vadd.s16 q2, q2, q3 360 vsubl.u8 q3, d2, d0 366 vadd.s16 q0, q3, q0 367 vadd.s16 q1, q3, q1 372 vadd.s16 q8, q3, q8 373 vadd.s16 q9, q3, q9 [all …]
|
D | vp9_idct16x16_add_neon.asm | 89 vmull.s16 q3, d19, d0 97 vmlsl.s16 q3, d31, d1 108 vqrshrn.s32 d9, q3, #14 ; >> 14 125 vmull.s16 q3, d27, d2 133 vmlsl.s16 q3, d23, d3 141 vqrshrn.s32 d11, q3, #14 ; >> 14 166 vadd.s32 q3, q2, q0 174 vqrshrn.s32 d16, q3, #14 ; >> 14 219 vsub.s16 q3, q8, q11 ; step1[3] = step2[0] - step2[3]; 251 vadd.s16 q11, q3, q4 ; step2[3] = step1[3] + step1[4]; [all …]
|
D | vp9_idct32x32_add_neon.asm | 370 vld1.s16 {q3}, [r3]! 398 vtrn.32 q1, q3 406 vtrn.16 q2, q3 423 vst1.16 {q3}, [r0]! 483 vadd.s16 q6, q2, q3 484 vsub.s16 q14, q2, q3 516 vsub.s16 q13, q3, q2 517 vadd.s16 q3, q3, q2 540 vadd.s16 q15, q6, q3 558 vsub.s16 q14, q6, q3 [all …]
|
D | vp9_idct8x8_add_neon.asm | 33 vmull.s16 q3, d19, d0 41 vmlsl.s16 q3, d31, d1 49 vqrshrn.s32 d9, q3, #14 ; >> 14 57 vmull.s16 q3, d19, d1 65 vmlal.s16 q3, d31, d0 73 vqrshrn.s32 d15, q3, #14 ; >> 14 84 vmull.s16 q3, d17, d0 92 vmlal.s16 q3, d25, d0 103 vqrshrn.s32 d19, q3, #14 ; >> 14 112 vmull.s16 q3, d21, d0 [all …]
|
D | vp9_copy_neon.asm | 36 vld1.8 {q2-q3}, [r0], lr 38 vst1.8 {q2-q3}, [r2@128], r3 47 vld1.8 {q2-q3}, [r0], r1 49 vst1.8 {q2-q3}, [r2@128], r3
|
/hardware/intel/common/omx-components/videocodec/libvpx_internal/libvpx/vp8/encoder/arm/neon/ |
D | shortfdct_neon.asm | 56 vshl.s16 q3, q3, #3 ; (c1, d1) << 3 122 vld1.16 {q3}, [r0@128], r2 124 ; transpose q0=ip[0], q1=ip[1], q2=ip[2], q3=ip[3] 126 vtrn.32 q1, q3 ; [A1|B1] 128 vtrn.16 q2, q3 ; [A3|B3] 130 vadd.s16 q11, q0, q3 ; a1 = ip[0] + ip[3] 133 vsub.s16 q14, q0, q3 ; d1 = ip[0] - ip[3] 165 ; transpose q0=ip[0], q1=ip[4], q2=ip[8], q3=ip[12] 167 vtrn.32 q1, q3 ; q1=[A4 | B4] 169 vtrn.16 q2, q3 ; q3=[A12|B12] [all …]
|
D | fastquantizeb_neon.asm | 42 vshr.s16 q3, q1, #15 64 veor.s16 q5, q3 99 vmul.s16 q3, q7, q5 115 vst1.s16 {q2, q3}, [r9] ; store dqcoeff = x * Dequant 125 vtst.16 q3, q11, q8 ; non-zero element is set to all 1 130 vand q11, q7, q3 176 vshr.s16 q3, q1, #15 206 veor.s16 q13, q3
|
D | vp8_shortwalsh4x4_neon.asm | 73 vsub.s32 q3, q8, q9 ; d2 = a1 - d1 78 vclt.s32 q11, q3, #0 84 vsub.s32 q3, q3, q11 ; d2 += d2 < 0 89 vadd.s32 q11, q3, q15 ; d2 + 3
|
/hardware/intel/common/omx-components/videocodec/libvpx_internal/libvpx/vpx_scale/arm/neon/ |
D | vp8_vpxyv12_copysrcframe_func_neon.asm | 55 vld1.8 {q2, q3}, [r2]! 65 vst1.8 {q2, q3}, [r3]! 111 vld1.8 {q2, q3}, [r2]! 117 vst1.8 {q2, q3}, [r3]! 169 vld1.8 {q2, q3}, [r2]! 175 vst1.8 {q2, q3}, [r3]! 217 vld1.8 {q2, q3}, [r2]! 221 vst1.8 {q2, q3}, [r3]!
|