/external/libaom/libaom/aom_dsp/x86/ |
D | loopfilter_sse2.c | 30 __m128i *q1p1, __m128i *q2p2, in transpose_pq_14_sse2() argument 75 *q1p1 = _mm_unpackhi_epi32( in transpose_pq_14_sse2() 247 __m128i q1p1, q0p0, p1p0, q1q0; in lpf_internal_4_sse2() local 252 q1p1 = _mm_unpacklo_epi32(*p1, *q1); in lpf_internal_4_sse2() 255 p1p0 = _mm_unpacklo_epi32(q0p0, q1p1); in lpf_internal_4_sse2() 259 flat = abs_diff(q1p1, q0p0); in lpf_internal_4_sse2() 290 __m128i q1p1, q0p0, p1p0, q1q0; in lpf_internal_4_dual_sse2() local 295 q1p1 = _mm_unpacklo_epi64(*p1, *q1); in lpf_internal_4_dual_sse2() 298 p1p0 = _mm_unpacklo_epi64(q0p0, q1p1); in lpf_internal_4_dual_sse2() 299 q1q0 = _mm_unpackhi_epi64(q0p0, q1p1); in lpf_internal_4_dual_sse2() [all …]
|
/external/libvpx/libvpx/vpx_dsp/x86/ |
D | loopfilter_avx2.c | 23 __m128i q7p7, q6p6, q5p5, q4p4, q3p3, q2p2, q1p1, q0p0, p0q0, p1q1; in vpx_lpf_horizontal_16_avx2() local 41 q1p1 = _mm_loadl_epi64((__m128i *)(s - 2 * pitch)); in vpx_lpf_horizontal_16_avx2() 42 q1p1 = _mm_castps_si128( in vpx_lpf_horizontal_16_avx2() 43 _mm_loadh_pi(_mm_castsi128_ps(q1p1), (__m64 *)(s + 1 * pitch))); in vpx_lpf_horizontal_16_avx2() 44 p1q1 = _mm_shuffle_epi32(q1p1, 78); in vpx_lpf_horizontal_16_avx2() 53 _mm_or_si128(_mm_subs_epu8(q1p1, q0p0), _mm_subs_epu8(q0p0, q1p1)); in vpx_lpf_horizontal_16_avx2() 60 _mm_or_si128(_mm_subs_epu8(q1p1, p1q1), _mm_subs_epu8(p1q1, q1p1)); in vpx_lpf_horizontal_16_avx2() 75 _mm_or_si128(_mm_subs_epu8(q2p2, q1p1), _mm_subs_epu8(q1p1, q2p2)), in vpx_lpf_horizontal_16_avx2() 89 __m128i qs1ps1 = _mm_xor_si128(q1p1, t80); in vpx_lpf_horizontal_16_avx2() 183 p1_16 = _mm_unpacklo_epi8(q1p1, zero); in vpx_lpf_horizontal_16_avx2() [all …]
|
D | loopfilter_sse2.c | 26 __m128i flat = abs_diff(q1p1, q0p0); \ 116 __m128i q1p1, q0p0, p3p2, p2p1, p1p0, q3q2, q2q1, q1q0, ps1ps0, qs1qs0; in vpx_lpf_horizontal_4_sse2() local 121 q1p1 = _mm_unpacklo_epi64(_mm_loadl_epi64((__m128i *)(s - 2 * pitch)), in vpx_lpf_horizontal_4_sse2() 127 p1p0 = _mm_unpacklo_epi64(q0p0, q1p1); in vpx_lpf_horizontal_4_sse2() 128 p2p1 = _mm_unpacklo_epi64(q1p1, p3p2); in vpx_lpf_horizontal_4_sse2() 129 q1q0 = _mm_unpackhi_epi64(q0p0, q1p1); in vpx_lpf_horizontal_4_sse2() 130 q2q1 = _mm_unpacklo_epi64(_mm_srli_si128(q1p1, 8), q3q2); in vpx_lpf_horizontal_4_sse2() 151 __m128i q1p1, q0p0, p3p2, p2p1, p1p0, q3q2, q2q1, q1q0, ps1ps0, qs1qs0; in vpx_lpf_vertical_4_sse2() local 192 q1p1 = _mm_unpackhi_epi64(p1p0, q1q0); in vpx_lpf_vertical_4_sse2() 193 p1p0 = _mm_unpacklo_epi64(q0p0, q1p1); in vpx_lpf_vertical_4_sse2() [all …]
|
/external/libaom/libaom/aom_dsp/arm/ |
D | loopfilter_neon.c | 159 uint8x8_t q0p0, q1p1, q2p2; in lpf_14_neon() local 226 q1p1 = vreinterpret_u8_u32(vrev64_u32(vreinterpret_u32_u8(*p1q1))); in lpf_14_neon() 240 out_pq1 = vaddw_u8(out_pq1, q1p1); in lpf_14_neon() 243 out_pq0 = vaddw_u8(out_pq0, q1p1); in lpf_14_neon() 284 out_pq4 = vaddw_u8(out_pq4, q1p1); in lpf_14_neon() 286 qp_sum = vaddl_u8(q2p2, q1p1); in lpf_14_neon() 408 uint8x8_t q0p0, q1p1, q2p2; in lpf_8_neon() local 416 q1p1 = vreinterpret_u8_u32(vrev64_u32(vreinterpret_u32_u8(*p1q1))); in lpf_8_neon() 424 out_pq1 = vaddw_u8(out_pq1, q1p1); in lpf_8_neon() 427 out_pq0 = vaddw_u8(out_pq0, q1p1); in lpf_8_neon()
|
/external/libgav1/libgav1/src/dsp/arm/ |
D | loop_filter_neon.cc | 321 const uint8x8_t q1p1 = Transpose32(p1q1); in Filter6() local 322 sum = vaddq_u16(vaddl_u8(q0p0, q1p1), sum); in Filter6() 557 const uint8x8_t q1p1 = Transpose32(p1q1); in Filter8() local 558 sum = vaddq_u16(vaddl_u8(p1q1, q1p1), sum); in Filter8() 787 const uint8x8_t q1p1 = Transpose32(p1q1); in Filter14() local 788 sum = vaddq_u16(vaddl_u8(p3q3, q1p1), sum); in Filter14()
|
/external/libgav1/libgav1/src/dsp/x86/ |
D | loop_filter_sse4.cc | 915 __m128i* q0p0, __m128i* q1p1, __m128i* q2p2, in DualTranspose8x4To4x8() argument 947 *q1p1 = _mm_unpackhi_epi32(ww1, _mm_slli_si128(ww2, 4)); in DualTranspose8x4To4x8()
|