Home
last modified time | relevance | path

Searched refs:_mm256_srai_epi32 (Results 1 – 25 of 45) sorted by relevance

12

/external/libvpx/libvpx/vpx_dsp/x86/
Dfwd_dct32x32_impl_avx2.h414 const __m256i s2_20_6 = _mm256_srai_epi32(s2_20_4, DCT_CONST_BITS); in FDCT32x32_2D_AVX2()
415 const __m256i s2_20_7 = _mm256_srai_epi32(s2_20_5, DCT_CONST_BITS); in FDCT32x32_2D_AVX2()
416 const __m256i s2_21_6 = _mm256_srai_epi32(s2_21_4, DCT_CONST_BITS); in FDCT32x32_2D_AVX2()
417 const __m256i s2_21_7 = _mm256_srai_epi32(s2_21_5, DCT_CONST_BITS); in FDCT32x32_2D_AVX2()
418 const __m256i s2_22_6 = _mm256_srai_epi32(s2_22_4, DCT_CONST_BITS); in FDCT32x32_2D_AVX2()
419 const __m256i s2_22_7 = _mm256_srai_epi32(s2_22_5, DCT_CONST_BITS); in FDCT32x32_2D_AVX2()
420 const __m256i s2_23_6 = _mm256_srai_epi32(s2_23_4, DCT_CONST_BITS); in FDCT32x32_2D_AVX2()
421 const __m256i s2_23_7 = _mm256_srai_epi32(s2_23_5, DCT_CONST_BITS); in FDCT32x32_2D_AVX2()
422 const __m256i s2_24_6 = _mm256_srai_epi32(s2_24_4, DCT_CONST_BITS); in FDCT32x32_2D_AVX2()
423 const __m256i s2_24_7 = _mm256_srai_epi32(s2_24_5, DCT_CONST_BITS); in FDCT32x32_2D_AVX2()
[all …]
Davg_intrin_avx2.c159 b0 = _mm256_srai_epi32(b0, 1); in vpx_highbd_hadamard_16x16_avx2()
160 b1 = _mm256_srai_epi32(b1, 1); in vpx_highbd_hadamard_16x16_avx2()
161 b2 = _mm256_srai_epi32(b2, 1); in vpx_highbd_hadamard_16x16_avx2()
162 b3 = _mm256_srai_epi32(b3, 1); in vpx_highbd_hadamard_16x16_avx2()
200 b0 = _mm256_srai_epi32(b0, 2); in vpx_highbd_hadamard_32x32_avx2()
201 b1 = _mm256_srai_epi32(b1, 2); in vpx_highbd_hadamard_32x32_avx2()
202 b2 = _mm256_srai_epi32(b2, 2); in vpx_highbd_hadamard_32x32_avx2()
203 b3 = _mm256_srai_epi32(b3, 2); in vpx_highbd_hadamard_32x32_avx2()
/external/libaom/libaom/av1/encoder/x86/
Dav1_fwd_txfm_avx2.h29 *in0 = _mm256_srai_epi32(temp0, cos_bit); in btf_32_avx2_type0()
34 *in1 = _mm256_srai_epi32(temp1, cos_bit); in btf_32_avx2_type0()
48 *in0 = _mm256_srai_epi32(temp0, cos_bit); in btf_32_avx2_type1()
53 *in1 = _mm256_srai_epi32(temp1, cos_bit); in btf_32_avx2_type1()
68 *in0 = _mm256_srai_epi32(temp0, cos_bit); in btf_32_avx2_type0_new()
73 *in1 = _mm256_srai_epi32(temp1, cos_bit); in btf_32_avx2_type0_new()
88 *in0 = _mm256_srai_epi32(temp0, cos_bit); in btf_32_avx2_type1_new()
93 *in1 = _mm256_srai_epi32(temp1, cos_bit); in btf_32_avx2_type1_new()
Dhighbd_fwd_txfm_avx2.c88 in[0] = _mm256_srai_epi32(in[0], shift); in col_txfm_8x8_rounding()
89 in[1] = _mm256_srai_epi32(in[1], shift); in col_txfm_8x8_rounding()
90 in[2] = _mm256_srai_epi32(in[2], shift); in col_txfm_8x8_rounding()
91 in[3] = _mm256_srai_epi32(in[3], shift); in col_txfm_8x8_rounding()
92 in[4] = _mm256_srai_epi32(in[4], shift); in col_txfm_8x8_rounding()
93 in[5] = _mm256_srai_epi32(in[5], shift); in col_txfm_8x8_rounding()
94 in[6] = _mm256_srai_epi32(in[6], shift); in col_txfm_8x8_rounding()
95 in[7] = _mm256_srai_epi32(in[7], shift); in col_txfm_8x8_rounding()
189 in[stride * i] = _mm256_srai_epi32(in[stride * i], bit); in round_shift_32_8xn_avx2()
221 x = _mm256_srai_epi32(x, bit); in av1_half_btf_avx2()
[all …]
Dpickrst_avx2.c535 _mm256_srai_epi32(_mm256_add_epi32(v0, rounding), shift); in av1_lowbd_pixel_proj_error_avx2()
537 _mm256_srai_epi32(_mm256_add_epi32(v1, rounding), shift); in av1_lowbd_pixel_proj_error_avx2()
580 _mm256_srai_epi32(_mm256_add_epi32(v0, rounding), shift); in av1_lowbd_pixel_proj_error_avx2()
582 _mm256_srai_epi32(_mm256_add_epi32(v1, rounding), shift); in av1_lowbd_pixel_proj_error_avx2()
921 _mm256_srai_epi32(_mm256_add_epi32(vl, rounding), shift); in av1_highbd_pixel_proj_error_avx2()
923 _mm256_srai_epi32(_mm256_add_epi32(vh, rounding), shift); in av1_highbd_pixel_proj_error_avx2()
998 _mm256_srai_epi32(_mm256_add_epi32(vh, rounding), shift); in av1_highbd_pixel_proj_error_avx2()
1000 _mm256_srai_epi32(_mm256_add_epi32(vl, rounding), shift); in av1_highbd_pixel_proj_error_avx2()
/external/libaom/libaom/av1/common/x86/
Dhighbd_inv_txfm_avx2.c58 in[0] = _mm256_srai_epi32(in[0], shift); in round_shift_4x4_avx2()
59 in[1] = _mm256_srai_epi32(in[1], shift); in round_shift_4x4_avx2()
60 in[2] = _mm256_srai_epi32(in[2], shift); in round_shift_4x4_avx2()
61 in[3] = _mm256_srai_epi32(in[3], shift); in round_shift_4x4_avx2()
247 x = _mm256_srai_epi32(x, bit); in half_btf_0_avx2()
260 x = _mm256_srai_epi32(x, bit); in half_btf_avx2()
460 x = _mm256_srai_epi32(x, bit); in idct32_low1_avx2()
1172 in[0] = _mm256_srai_epi32(in[0], bit); in idct16_low1_avx2()
1271 u[0] = _mm256_srai_epi32(u[0], bit); in idct16_low8_avx2()
1295 u[5] = _mm256_srai_epi32(u[5], bit); in idct16_low8_avx2()
[all …]
Dhighbd_wiener_convolve_avx2.c118 const __m256i res_even = _mm256_srai_epi32( in av1_highbd_wiener_convolve_add_src_avx2()
123 const __m256i res_odd = _mm256_srai_epi32( in av1_highbd_wiener_convolve_add_src_avx2()
228 const __m256i res_lo_round = _mm256_srai_epi32( in av1_highbd_wiener_convolve_add_src_avx2()
230 const __m256i res_hi_round = _mm256_srai_epi32( in av1_highbd_wiener_convolve_add_src_avx2()
Dselfguided_avx2.c344 __m256i w = _mm256_srai_epi32(_mm256_add_epi32(v, rounding), in final_filter()
523 _mm256_srai_epi32(_mm256_add_epi32(v, rounding0), in final_filter_fast()
540 _mm256_srai_epi32(_mm256_add_epi32(v, rounding1), in final_filter_fast()
696 const __m256i w_0 = _mm256_srai_epi32( in av1_apply_selfguided_restoration_avx2()
698 const __m256i w_1 = _mm256_srai_epi32( in av1_apply_selfguided_restoration_avx2()
/external/libaom/libaom/aom_dsp/x86/
Dobmc_variance_avx2.c56 const __m256i v_sign_d = _mm256_srai_epi32(v_diff0_d, 31); in obmc_variance_w8n()
59 const __m256i v_rdiff0_d = _mm256_srai_epi32(v_tmp_d, 12); in obmc_variance_w8n()
119 const __m256i v_sign0_d = _mm256_srai_epi32(v_diff0_d, 31); in obmc_variance_w16n()
120 const __m256i v_sign1_d = _mm256_srai_epi32(v_diff1_d, 31); in obmc_variance_w16n()
127 const __m256i v_rdiff0_d = _mm256_srai_epi32(v_tmp0_d, 12); in obmc_variance_w16n()
128 const __m256i v_rdiff2_d = _mm256_srai_epi32(v_tmp1_d, 12); in obmc_variance_w16n()
Dtxfm_common_avx2.h43 __m256i c0 = _mm256_srai_epi32(a0, cos_bit); in btf_16_w16_avx2()
44 __m256i c1 = _mm256_srai_epi32(a1, cos_bit); in btf_16_w16_avx2()
45 __m256i d0 = _mm256_srai_epi32(b0, cos_bit); in btf_16_w16_avx2()
46 __m256i d1 = _mm256_srai_epi32(b1, cos_bit); in btf_16_w16_avx2()
253 return _mm256_srai_epi32(tmp, bit); in av1_round_shift_32_avx2()
300 return _mm256_srai_epi32(b, NewSqrt2Bits); in scale_round_avx2()
Davg_intrin_avx2.c400 b0 = _mm256_srai_epi32(b0, 1); in aom_highbd_hadamard_16x16_avx2()
401 b1 = _mm256_srai_epi32(b1, 1); in aom_highbd_hadamard_16x16_avx2()
402 b2 = _mm256_srai_epi32(b2, 1); in aom_highbd_hadamard_16x16_avx2()
403 b3 = _mm256_srai_epi32(b3, 1); in aom_highbd_hadamard_16x16_avx2()
441 b0 = _mm256_srai_epi32(b0, 2); in aom_highbd_hadamard_32x32_avx2()
442 b1 = _mm256_srai_epi32(b1, 2); in aom_highbd_hadamard_32x32_avx2()
443 b2 = _mm256_srai_epi32(b2, 2); in aom_highbd_hadamard_32x32_avx2()
444 b3 = _mm256_srai_epi32(b3, 2); in aom_highbd_hadamard_32x32_avx2()
Dconvolve_avx2.h395 d = _mm256_srai_epi32(d, 1); in add_store_aligned_256()
414 const __m256i res_lo = _mm256_srai_epi32(wt_res_lo, DIST_PRECISION_BITS); in comp_avg()
415 const __m256i res_hi = _mm256_srai_epi32(wt_res_hi, DIST_PRECISION_BITS); in comp_avg()
445 res = _mm256_srai_epi32(wt_res, DIST_PRECISION_BITS); in highbd_comp_avg()
448 res = _mm256_srai_epi32(wt_res, 1); in highbd_comp_avg()
457 const __m256i res_round = _mm256_srai_epi32( in highbd_convolve_rounding()
Dblend_a64_mask_avx2.c40 _mm256_srai_epi32(_mm256_sub_epi32(res0_lo, *v_round_offset), shift); in blend_a64_d16_mask_w16_avx2()
42 _mm256_srai_epi32(_mm256_sub_epi32(res0_hi, *v_round_offset), shift); in blend_a64_d16_mask_w16_avx2()
68 _mm256_srai_epi32(_mm256_sub_epi32(res0_lo, *v_round_offset), shift); in blend_a64_d16_mask_w32_avx2()
70 _mm256_srai_epi32(_mm256_sub_epi32(res0_hi, *v_round_offset), shift); in blend_a64_d16_mask_w32_avx2()
72 _mm256_srai_epi32(_mm256_sub_epi32(res1_lo, *v_round_offset), shift); in blend_a64_d16_mask_w32_avx2()
74 _mm256_srai_epi32(_mm256_sub_epi32(res1_hi, *v_round_offset), shift); in blend_a64_d16_mask_w32_avx2()
942 _mm256_srai_epi32(_mm256_sub_epi32(sumh, *round_offset), shift); in highbd_blend_a64_d16_mask_w4_avx2()
944 _mm256_srai_epi32(_mm256_sub_epi32(suml, *round_offset), shift); in highbd_blend_a64_d16_mask_w4_avx2()
1074 _mm256_srai_epi32(_mm256_sub_epi32(sumah, *round_offset), shift); in highbd_blend_a64_d16_mask_w8_avx2()
1076 _mm256_srai_epi32(_mm256_sub_epi32(sumal, *round_offset), shift); in highbd_blend_a64_d16_mask_w8_avx2()
[all …]
Dmasked_sad_intrin_avx2.c229 pred_l = _mm256_srai_epi32(_mm256_add_epi32(pred_l, round_const), in highbd_masked_sad8xh_avx2()
235 pred_r = _mm256_srai_epi32(_mm256_add_epi32(pred_r, round_const), in highbd_masked_sad8xh_avx2()
286 pred_l = _mm256_srai_epi32(_mm256_add_epi32(pred_l, round_const), in highbd_masked_sad16xh_avx2()
292 pred_r = _mm256_srai_epi32(_mm256_add_epi32(pred_r, round_const), in highbd_masked_sad16xh_avx2()
Dhighbd_convolve_avx2.c489 *y = _mm256_srai_epi32(a, CONV8_ROUNDING_BITS); in filter_8x1_pixels()
600 _mm256_srai_epi32(_mm256_add_epi32(res, rounding), CONV8_ROUNDING_BITS); in aom_highbd_filter_block1d4_h4_avx2()
626 _mm256_srai_epi32(_mm256_add_epi32(res, rounding), CONV8_ROUNDING_BITS); in aom_highbd_filter_block1d4_h4_avx2()
676 res_even = _mm256_srai_epi32(_mm256_add_epi32(res_even, rounding), in aom_highbd_filter_block1d8_h4_avx2()
684 res_odd = _mm256_srai_epi32(_mm256_add_epi32(res_odd, rounding), in aom_highbd_filter_block1d8_h4_avx2()
714 _mm256_srai_epi32(_mm256_add_epi32(res, rounding), CONV8_ROUNDING_BITS); in aom_highbd_filter_block1d8_h4_avx2()
796 *y0 = _mm256_srai_epi32(x0, CONV8_ROUNDING_BITS); in filter_16_2t_pixels()
797 *y1 = _mm256_srai_epi32(x1, CONV8_ROUNDING_BITS); in filter_16_2t_pixels()
805 *y0 = _mm256_srai_epi32(x0, CONV8_ROUNDING_BITS); in filter_8x1_2t_pixels()
/external/XNNPACK/src/qs8-vaddc/gen/
Dminmax-avx2-mul32-ld64-x32.c48 …= _mm256_add_epi32(_mm256_and_si256(vacc01234567, vremainder_mask), _mm256_srai_epi32(vacc01234567… in xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x32()
49 …= _mm256_add_epi32(_mm256_and_si256(vacc89ABCDEF, vremainder_mask), _mm256_srai_epi32(vacc89ABCDEF… in xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x32()
50 …= _mm256_add_epi32(_mm256_and_si256(vaccGHIJKLMN, vremainder_mask), _mm256_srai_epi32(vaccGHIJKLMN… in xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x32()
51 …= _mm256_add_epi32(_mm256_and_si256(vaccOPQRSTUV, vremainder_mask), _mm256_srai_epi32(vaccOPQRSTUV… in xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x32()
78 …= _mm256_add_epi32(_mm256_and_si256(vacc01234567, vremainder_mask), _mm256_srai_epi32(vacc01234567… in xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x32()
Dminmax-avx2-mul32-ld64-x24.c46 …= _mm256_add_epi32(_mm256_and_si256(vacc01234567, vremainder_mask), _mm256_srai_epi32(vacc01234567… in xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x24()
47 …= _mm256_add_epi32(_mm256_and_si256(vacc89ABCDEF, vremainder_mask), _mm256_srai_epi32(vacc89ABCDEF… in xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x24()
48 …= _mm256_add_epi32(_mm256_and_si256(vaccGHIJKLMN, vremainder_mask), _mm256_srai_epi32(vaccGHIJKLMN… in xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x24()
74 …= _mm256_add_epi32(_mm256_and_si256(vacc01234567, vremainder_mask), _mm256_srai_epi32(vacc01234567… in xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x24()
Dminmax-avx2-mul32-ld64-x16.c44 …= _mm256_add_epi32(_mm256_and_si256(vacc01234567, vremainder_mask), _mm256_srai_epi32(vacc01234567… in xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x16()
45 …= _mm256_add_epi32(_mm256_and_si256(vacc89ABCDEF, vremainder_mask), _mm256_srai_epi32(vacc89ABCDEF… in xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x16()
66 …= _mm256_add_epi32(_mm256_and_si256(vacc01234567, vremainder_mask), _mm256_srai_epi32(vacc01234567… in xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x16()
Dminmax-avx2-mul32-ld64-x8.c42 …= _mm256_add_epi32(_mm256_and_si256(vacc01234567, vremainder_mask), _mm256_srai_epi32(vacc01234567… in xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x8()
61 …= _mm256_add_epi32(_mm256_and_si256(vacc01234567, vremainder_mask), _mm256_srai_epi32(vacc01234567… in xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x8()
/external/XNNPACK/src/qs8-vadd/gen/
Dminmax-avx2-mul32-ld64-x32.c57 …= _mm256_add_epi32(_mm256_and_si256(vacc01234567, vremainder_mask), _mm256_srai_epi32(vacc01234567… in xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x32()
58 …= _mm256_add_epi32(_mm256_and_si256(vacc89ABCDEF, vremainder_mask), _mm256_srai_epi32(vacc89ABCDEF… in xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x32()
59 …= _mm256_add_epi32(_mm256_and_si256(vaccGHIJKLMN, vremainder_mask), _mm256_srai_epi32(vaccGHIJKLMN… in xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x32()
60 …= _mm256_add_epi32(_mm256_and_si256(vaccOPQRSTUV, vremainder_mask), _mm256_srai_epi32(vaccOPQRSTUV… in xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x32()
91 …= _mm256_add_epi32(_mm256_and_si256(vacc01234567, vremainder_mask), _mm256_srai_epi32(vacc01234567… in xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x32()
Dminmax-avx2-mul32-ld64-x24.c53 …= _mm256_add_epi32(_mm256_and_si256(vacc01234567, vremainder_mask), _mm256_srai_epi32(vacc01234567… in xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x24()
54 …= _mm256_add_epi32(_mm256_and_si256(vacc89ABCDEF, vremainder_mask), _mm256_srai_epi32(vacc89ABCDEF… in xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x24()
55 …= _mm256_add_epi32(_mm256_and_si256(vaccGHIJKLMN, vremainder_mask), _mm256_srai_epi32(vaccGHIJKLMN… in xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x24()
85 …= _mm256_add_epi32(_mm256_and_si256(vacc01234567, vremainder_mask), _mm256_srai_epi32(vacc01234567… in xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x24()
Dminmax-avx2-mul32-ld64-x16.c49 …= _mm256_add_epi32(_mm256_and_si256(vacc01234567, vremainder_mask), _mm256_srai_epi32(vacc01234567… in xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x16()
50 …= _mm256_add_epi32(_mm256_and_si256(vacc89ABCDEF, vremainder_mask), _mm256_srai_epi32(vacc89ABCDEF… in xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x16()
75 …= _mm256_add_epi32(_mm256_and_si256(vacc01234567, vremainder_mask), _mm256_srai_epi32(vacc01234567… in xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x16()
Dminmax-avx2-mul32-ld64-x8.c45 …= _mm256_add_epi32(_mm256_and_si256(vacc01234567, vremainder_mask), _mm256_srai_epi32(vacc01234567… in xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x8()
67 …= _mm256_add_epi32(_mm256_and_si256(vacc01234567, vremainder_mask), _mm256_srai_epi32(vacc01234567… in xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x8()
/external/libaom/libaom/aom_dsp/simd/
Dv256_intrinsics_x86.h221 _mm256_packs_epi32(_mm256_srai_epi32(b, 16), _mm256_srai_epi32(a, 16)), in v256_unziphi_16()
326 return _mm256_srai_epi32( in v256_unpacklo_s16_s32()
339 return _mm256_srai_epi32( in v256_unpackhi_s16_s32()
694 #define v256_shr_n_s32(a, c) _mm256_srai_epi32(a, c)
/external/gemmlowp/fixedpoint/
Dfixedpoint_avx.h144 return _mm256_srai_epi32(a, offset);
297 rounded_half_sum = _mm256_srai_epi32(Add(sum, one), 1);

12