Home
last modified time | relevance | path

Searched refs:sum_lo (Results 1 – 21 of 21) sorted by relevance

/external/libgav1/libgav1/src/dsp/arm/
Dintra_edge_neon.cc78 uint16x8_t sum_lo = vaddl_u8(vget_low_u8(src_0), vget_low_u8(src_2)); in IntraEdgeFilter_NEON() local
79 sum_lo = vmulq_n_u16(sum_lo, kKernelsNEON[kernel_index][0]); in IntraEdgeFilter_NEON()
80 sum_lo = vmlal_u8(sum_lo, vget_low_u8(src_1), krn1); in IntraEdgeFilter_NEON()
86 vcombine_u8(vrshrn_n_u16(sum_lo, 4), vrshrn_n_u16(sum_hi, 4)); in IntraEdgeFilter_NEON()
102 uint16x8_t sum_lo = vaddl_u8(vget_low_u8(src_0), vget_low_u8(src_2)); in IntraEdgeFilter_NEON() local
103 sum_lo = vmulq_n_u16(sum_lo, kKernelsNEON[kernel_index][0]); in IntraEdgeFilter_NEON()
104 sum_lo = vmlal_u8(sum_lo, vget_low_u8(src_1), krn1); in IntraEdgeFilter_NEON()
110 vcombine_u8(vrshrn_n_u16(sum_lo, 4), vrshrn_n_u16(sum_hi, 4)); in IntraEdgeFilter_NEON()
158 uint16x8_t sum_lo = in IntraEdgeFilter_NEON() local
162 sum_lo = vaddq_u16(sum_lo, vshlq_n_u16(sum_123_lo, 2)); in IntraEdgeFilter_NEON()
[all …]
Dconvolve_neon.cc425 int32x4_t sum_lo, sum_hi; in SimpleSum2DVerticalTaps() local
427 sum_lo = vmull_lane_s16(vget_low_s16(src[0]), taps_lo, 0); in SimpleSum2DVerticalTaps()
429 sum_lo = vmlal_lane_s16(sum_lo, vget_low_s16(src[1]), taps_lo, 1); in SimpleSum2DVerticalTaps()
431 sum_lo = vmlal_lane_s16(sum_lo, vget_low_s16(src[2]), taps_lo, 2); in SimpleSum2DVerticalTaps()
433 sum_lo = vmlal_lane_s16(sum_lo, vget_low_s16(src[3]), taps_lo, 3); in SimpleSum2DVerticalTaps()
436 sum_lo = vmlal_lane_s16(sum_lo, vget_low_s16(src[4]), taps_hi, 0); in SimpleSum2DVerticalTaps()
438 sum_lo = vmlal_lane_s16(sum_lo, vget_low_s16(src[5]), taps_hi, 1); in SimpleSum2DVerticalTaps()
440 sum_lo = vmlal_lane_s16(sum_lo, vget_low_s16(src[6]), taps_hi, 2); in SimpleSum2DVerticalTaps()
442 sum_lo = vmlal_lane_s16(sum_lo, vget_low_s16(src[7]), taps_hi, 3); in SimpleSum2DVerticalTaps()
445 sum_lo = vmull_lane_s16(vget_low_s16(src[0]), taps_lo, 1); in SimpleSum2DVerticalTaps()
[all …]
/external/libgav1/libgav1/src/dsp/x86/
Dintra_edge_sse4.cc222 __m128i sum_lo = _mm_sub_epi16(_mm_alignr_epi8(src9_hi, src9_lo, 2), src_lo); in IntraEdgeUpsampler_SSE4_1() local
223 sum_lo = _mm_add_epi16(sum_lo, _mm_alignr_epi8(src9_hi, src9_lo, 4)); in IntraEdgeUpsampler_SSE4_1()
224 sum_lo = _mm_sub_epi16(sum_lo, _mm_alignr_epi8(src_hi, src_lo, 6)); in IntraEdgeUpsampler_SSE4_1()
225 sum_lo = RightShiftWithRounding_S16(sum_lo, 4); in IntraEdgeUpsampler_SSE4_1()
226 const __m128i result_lo = _mm_unpacklo_epi8(_mm_packus_epi16(sum_lo, sum_lo), in IntraEdgeUpsampler_SSE4_1()
Dconvolve_sse4.cc1001 __m128i sum_lo = _mm_madd_epi16(src_lo_01, taps[0]); in Sum2DVerticalTaps() local
1006 sum_lo = _mm_add_epi32(sum_lo, _mm_madd_epi16(src_lo_23, taps[1])); in Sum2DVerticalTaps()
1012 sum_lo = _mm_add_epi32(sum_lo, _mm_madd_epi16(src_lo_45, taps[2])); in Sum2DVerticalTaps()
1018 sum_lo = _mm_add_epi32(sum_lo, _mm_madd_epi16(src_lo_67, taps[3])); in Sum2DVerticalTaps()
1024 RightShiftWithRounding_S32(sum_lo, kInterRoundBitsCompoundVertical - 1), in Sum2DVerticalTaps()
1029 RightShiftWithRounding_S32(sum_lo, kInterRoundBitsVertical - 1), in Sum2DVerticalTaps()
1039 __m128i sum_lo = _mm_madd_epi16(src_lo_01, taps_lo[0]); in Sum2DVerticalTaps4x2() local
1044 sum_lo = _mm_add_epi32(sum_lo, _mm_madd_epi16(src_lo_23, taps_lo[1])); in Sum2DVerticalTaps4x2()
1050 sum_lo = _mm_add_epi32(sum_lo, _mm_madd_epi16(src_lo_45, taps_lo[2])); in Sum2DVerticalTaps4x2()
1056 sum_lo = _mm_add_epi32(sum_lo, _mm_madd_epi16(src_lo_67, taps_lo[3])); in Sum2DVerticalTaps4x2()
[all …]
Dconvolve_sse4.inc164 __m128i sum_lo = _mm_madd_epi16(_mm_unpacklo_epi16(src[0], src[1]), taps[0]);
171 sum_lo = _mm_add_epi32(sum_lo, madd_lo);
176 sum_lo = _mm_add_epi32(sum_lo, madd_lo);
181 sum_lo = _mm_add_epi32(sum_lo, madd_lo);
189 RightShiftWithRounding_S32(sum_lo, kInterRoundBitsCompoundVertical - 1),
195 RightShiftWithRounding_S32(sum_lo, kInterRoundBitsVertical - 1),
Dconvolve_avx2.cc428 __m256i sum_lo = in SimpleSum2DVerticalTaps() local
437 sum_lo = _mm256_add_epi32(sum_lo, madd_lo); in SimpleSum2DVerticalTaps()
444 sum_lo = _mm256_add_epi32(sum_lo, madd_lo); in SimpleSum2DVerticalTaps()
451 sum_lo = _mm256_add_epi32(sum_lo, madd_lo); in SimpleSum2DVerticalTaps()
459 RightShiftWithRounding_S32(sum_lo, kInterRoundBitsCompoundVertical - 1), in SimpleSum2DVerticalTaps()
465 RightShiftWithRounding_S32(sum_lo, kInterRoundBitsVertical - 1), in SimpleSum2DVerticalTaps()
Dloop_restoration_10bit_avx2.cc1316 const __m128i sum_lo = _mm_unpacklo_epi16(b, _mm_setzero_si128()); in CalculateMa() local
1318 const __m128i z0 = CalculateMa<n>(sum_lo, VrshrU32(sum_sq[0], 4), scale); in CalculateMa()
1347 const __m256i sum_lo = _mm256_unpacklo_epi16(b, _mm256_setzero_si256()); in CalculateMa() local
1349 const __m256i z0 = CalculateMa<n>(sum_lo, VrshrU32(sum_sq[0], 4), scale); in CalculateMa()
Dloop_restoration_avx2.cc1391 const __m128i sum_lo = _mm_unpacklo_epi16(sum, _mm_setzero_si128()); in CalculateMa() local
1393 const __m128i z0 = CalculateMa<n>(sum_lo, sum_sq[0], scale); in CalculateMa()
1421 const __m256i sum_lo = _mm256_unpacklo_epi16(sum, _mm256_setzero_si256()); in CalculateMa() local
1423 const __m256i z0 = CalculateMa<n>(sum_lo, sum_sq[0], scale); in CalculateMa()
Dloop_restoration_10bit_sse4.cc1017 const __m128i sum_lo = _mm_unpacklo_epi16(b, _mm_setzero_si128()); in CalculateMa() local
1019 const __m128i z0 = CalculateMa<n>(sum_lo, VrshrU32(sum_sq[0], 4), scale); in CalculateMa()
Dloop_restoration_sse4.cc1158 const __m128i sum_lo = _mm_unpacklo_epi16(sum, _mm_setzero_si128()); in CalculateMa() local
1160 const __m128i z0 = CalculateMa<n>(sum_lo, sum_sq[0], scale); in CalculateMa()
/external/libvpx/libvpx/vp8/common/x86/
Dbilinear_filter_sse2.c56 const __m128i sum_lo = _mm_add_epi16(a_lo_filtered, b_lo_filtered); in horizontal_16x16() local
59 const __m128i compensated_lo = _mm_add_epi16(sum_lo, round_factor); in horizontal_16x16()
108 const __m128i sum_lo = in vertical_16x16() local
113 const __m128i compensated_lo = _mm_add_epi16(sum_lo, round_factor); in vertical_16x16()
/external/libvpx/libvpx/vp9/common/arm/neon/
Dvp9_highbd_iht16x16_add_neon.c122 const int64x2x2_t sum_lo = vaddq_s64_dual(in0[0], in1[0]); in highbd_add_dct_const_round_shift_low_8() local
126 out_lo.val[0] = vrshrn_n_s64(sum_lo.val[0], DCT_CONST_BITS); in highbd_add_dct_const_round_shift_low_8()
127 out_lo.val[1] = vrshrn_n_s64(sum_lo.val[1], DCT_CONST_BITS); in highbd_add_dct_const_round_shift_low_8()
Dvp9_highbd_iht8x8_add_neon.c72 const int64x2_t sum_lo = vaddq_s64(in0[0], in1[0]); in highbd_add_dct_const_round_shift_low_8() local
74 const int32x2_t out_lo = vrshrn_n_s64(sum_lo, DCT_CONST_BITS); in highbd_add_dct_const_round_shift_low_8()
/external/libaom/libaom/aom_dsp/x86/
Dvariance_avx2.c86 const __m256i sum_lo = _mm256_cvtepi16_epi32(_mm256_castsi256_si128(sum)); in sum_to_32bit_avx2() local
89 return _mm256_add_epi32(sum_lo, sum_hi); in sum_to_32bit_avx2()
Dvariance_sse2.c64 const __m128i sum_lo = _mm_srai_epi32(_mm_unpacklo_epi16(sum, sum), 16); in sum_to_32bit_sse2() local
66 return _mm_add_epi32(sum_lo, sum_hi); in sum_to_32bit_sse2()
Dhighbd_intrapred_sse2.c426 const __m128i sum_lo = dc_sum_8(ref); in dc_sum_16() local
428 return _mm_add_epi16(sum_lo, sum_hi); in dc_sum_16()
/external/libvpx/libvpx/vpx_dsp/x86/
Dvariance_sse2.c92 const __m128i sum_lo = _mm_srai_epi32(_mm_unpacklo_epi16(sum, sum), 16); in sum_to_32bit_sse2() local
94 return _mm_add_epi32(sum_lo, sum_hi); in sum_to_32bit_sse2()
Dhighbd_intrapred_intrin_sse2.c270 const __m128i sum_lo = dc_sum_8(ref); in dc_sum_16() local
272 return _mm_add_epi16(sum_lo, sum_hi); in dc_sum_16()
Dvariance_avx2.c95 const __m256i sum_lo = _mm256_cvtepi16_epi32(_mm256_castsi256_si128(sum)); in sum_to_32bit_avx2() local
98 return _mm256_add_epi32(sum_lo, sum_hi); in sum_to_32bit_avx2()
/external/libvpx/libvpx/vp9/encoder/x86/
Dhighbd_temporal_filter_sse4.c80 const __m128i sum_lo = _mm_unpacklo_epi32(*sum, zero); in highbd_average_4() local
85 const __m128i mul_lo = _mm_mul_epu32(sum_lo, const_lo); in highbd_average_4()
/external/libaom/libaom/av1/encoder/x86/
Dtemporal_filter_sse4.c1112 const __m128i sum_lo = _mm_unpacklo_epi32(*sum, zero); in highbd_average_4() local
1117 const __m128i mul_lo = _mm_mul_epu32(sum_lo, const_lo); in highbd_average_4()