/external/libvpx/libvpx/vpx_dsp/x86/ |
D | fwd_dct32x32_impl_avx2.h | 414 const __m256i s2_20_6 = _mm256_srai_epi32(s2_20_4, DCT_CONST_BITS); in FDCT32x32_2D_AVX2() 415 const __m256i s2_20_7 = _mm256_srai_epi32(s2_20_5, DCT_CONST_BITS); in FDCT32x32_2D_AVX2() 416 const __m256i s2_21_6 = _mm256_srai_epi32(s2_21_4, DCT_CONST_BITS); in FDCT32x32_2D_AVX2() 417 const __m256i s2_21_7 = _mm256_srai_epi32(s2_21_5, DCT_CONST_BITS); in FDCT32x32_2D_AVX2() 418 const __m256i s2_22_6 = _mm256_srai_epi32(s2_22_4, DCT_CONST_BITS); in FDCT32x32_2D_AVX2() 419 const __m256i s2_22_7 = _mm256_srai_epi32(s2_22_5, DCT_CONST_BITS); in FDCT32x32_2D_AVX2() 420 const __m256i s2_23_6 = _mm256_srai_epi32(s2_23_4, DCT_CONST_BITS); in FDCT32x32_2D_AVX2() 421 const __m256i s2_23_7 = _mm256_srai_epi32(s2_23_5, DCT_CONST_BITS); in FDCT32x32_2D_AVX2() 422 const __m256i s2_24_6 = _mm256_srai_epi32(s2_24_4, DCT_CONST_BITS); in FDCT32x32_2D_AVX2() 423 const __m256i s2_24_7 = _mm256_srai_epi32(s2_24_5, DCT_CONST_BITS); in FDCT32x32_2D_AVX2() [all …]
|
D | avg_intrin_avx2.c | 159 b0 = _mm256_srai_epi32(b0, 1); in vpx_highbd_hadamard_16x16_avx2() 160 b1 = _mm256_srai_epi32(b1, 1); in vpx_highbd_hadamard_16x16_avx2() 161 b2 = _mm256_srai_epi32(b2, 1); in vpx_highbd_hadamard_16x16_avx2() 162 b3 = _mm256_srai_epi32(b3, 1); in vpx_highbd_hadamard_16x16_avx2() 200 b0 = _mm256_srai_epi32(b0, 2); in vpx_highbd_hadamard_32x32_avx2() 201 b1 = _mm256_srai_epi32(b1, 2); in vpx_highbd_hadamard_32x32_avx2() 202 b2 = _mm256_srai_epi32(b2, 2); in vpx_highbd_hadamard_32x32_avx2() 203 b3 = _mm256_srai_epi32(b3, 2); in vpx_highbd_hadamard_32x32_avx2()
|
/external/libaom/libaom/av1/encoder/x86/ |
D | av1_fwd_txfm_avx2.h | 29 *in0 = _mm256_srai_epi32(temp0, cos_bit); in btf_32_avx2_type0() 34 *in1 = _mm256_srai_epi32(temp1, cos_bit); in btf_32_avx2_type0() 48 *in0 = _mm256_srai_epi32(temp0, cos_bit); in btf_32_avx2_type1() 53 *in1 = _mm256_srai_epi32(temp1, cos_bit); in btf_32_avx2_type1() 68 *in0 = _mm256_srai_epi32(temp0, cos_bit); in btf_32_avx2_type0_new() 73 *in1 = _mm256_srai_epi32(temp1, cos_bit); in btf_32_avx2_type0_new() 88 *in0 = _mm256_srai_epi32(temp0, cos_bit); in btf_32_avx2_type1_new() 93 *in1 = _mm256_srai_epi32(temp1, cos_bit); in btf_32_avx2_type1_new()
|
D | highbd_fwd_txfm_avx2.c | 88 in[0] = _mm256_srai_epi32(in[0], shift); in col_txfm_8x8_rounding() 89 in[1] = _mm256_srai_epi32(in[1], shift); in col_txfm_8x8_rounding() 90 in[2] = _mm256_srai_epi32(in[2], shift); in col_txfm_8x8_rounding() 91 in[3] = _mm256_srai_epi32(in[3], shift); in col_txfm_8x8_rounding() 92 in[4] = _mm256_srai_epi32(in[4], shift); in col_txfm_8x8_rounding() 93 in[5] = _mm256_srai_epi32(in[5], shift); in col_txfm_8x8_rounding() 94 in[6] = _mm256_srai_epi32(in[6], shift); in col_txfm_8x8_rounding() 95 in[7] = _mm256_srai_epi32(in[7], shift); in col_txfm_8x8_rounding() 189 in[stride * i] = _mm256_srai_epi32(in[stride * i], bit); in round_shift_32_8xn_avx2() 221 x = _mm256_srai_epi32(x, bit); in av1_half_btf_avx2() [all …]
|
D | pickrst_avx2.c | 535 _mm256_srai_epi32(_mm256_add_epi32(v0, rounding), shift); in av1_lowbd_pixel_proj_error_avx2() 537 _mm256_srai_epi32(_mm256_add_epi32(v1, rounding), shift); in av1_lowbd_pixel_proj_error_avx2() 580 _mm256_srai_epi32(_mm256_add_epi32(v0, rounding), shift); in av1_lowbd_pixel_proj_error_avx2() 582 _mm256_srai_epi32(_mm256_add_epi32(v1, rounding), shift); in av1_lowbd_pixel_proj_error_avx2() 921 _mm256_srai_epi32(_mm256_add_epi32(vl, rounding), shift); in av1_highbd_pixel_proj_error_avx2() 923 _mm256_srai_epi32(_mm256_add_epi32(vh, rounding), shift); in av1_highbd_pixel_proj_error_avx2() 998 _mm256_srai_epi32(_mm256_add_epi32(vh, rounding), shift); in av1_highbd_pixel_proj_error_avx2() 1000 _mm256_srai_epi32(_mm256_add_epi32(vl, rounding), shift); in av1_highbd_pixel_proj_error_avx2()
|
/external/libaom/libaom/av1/common/x86/ |
D | highbd_inv_txfm_avx2.c | 58 in[0] = _mm256_srai_epi32(in[0], shift); in round_shift_4x4_avx2() 59 in[1] = _mm256_srai_epi32(in[1], shift); in round_shift_4x4_avx2() 60 in[2] = _mm256_srai_epi32(in[2], shift); in round_shift_4x4_avx2() 61 in[3] = _mm256_srai_epi32(in[3], shift); in round_shift_4x4_avx2() 247 x = _mm256_srai_epi32(x, bit); in half_btf_0_avx2() 260 x = _mm256_srai_epi32(x, bit); in half_btf_avx2() 460 x = _mm256_srai_epi32(x, bit); in idct32_low1_avx2() 1172 in[0] = _mm256_srai_epi32(in[0], bit); in idct16_low1_avx2() 1271 u[0] = _mm256_srai_epi32(u[0], bit); in idct16_low8_avx2() 1295 u[5] = _mm256_srai_epi32(u[5], bit); in idct16_low8_avx2() [all …]
|
D | highbd_wiener_convolve_avx2.c | 118 const __m256i res_even = _mm256_srai_epi32( in av1_highbd_wiener_convolve_add_src_avx2() 123 const __m256i res_odd = _mm256_srai_epi32( in av1_highbd_wiener_convolve_add_src_avx2() 228 const __m256i res_lo_round = _mm256_srai_epi32( in av1_highbd_wiener_convolve_add_src_avx2() 230 const __m256i res_hi_round = _mm256_srai_epi32( in av1_highbd_wiener_convolve_add_src_avx2()
|
D | selfguided_avx2.c | 344 __m256i w = _mm256_srai_epi32(_mm256_add_epi32(v, rounding), in final_filter() 523 _mm256_srai_epi32(_mm256_add_epi32(v, rounding0), in final_filter_fast() 540 _mm256_srai_epi32(_mm256_add_epi32(v, rounding1), in final_filter_fast() 696 const __m256i w_0 = _mm256_srai_epi32( in av1_apply_selfguided_restoration_avx2() 698 const __m256i w_1 = _mm256_srai_epi32( in av1_apply_selfguided_restoration_avx2()
|
/external/libaom/libaom/aom_dsp/x86/ |
D | obmc_variance_avx2.c | 56 const __m256i v_sign_d = _mm256_srai_epi32(v_diff0_d, 31); in obmc_variance_w8n() 59 const __m256i v_rdiff0_d = _mm256_srai_epi32(v_tmp_d, 12); in obmc_variance_w8n() 119 const __m256i v_sign0_d = _mm256_srai_epi32(v_diff0_d, 31); in obmc_variance_w16n() 120 const __m256i v_sign1_d = _mm256_srai_epi32(v_diff1_d, 31); in obmc_variance_w16n() 127 const __m256i v_rdiff0_d = _mm256_srai_epi32(v_tmp0_d, 12); in obmc_variance_w16n() 128 const __m256i v_rdiff2_d = _mm256_srai_epi32(v_tmp1_d, 12); in obmc_variance_w16n()
|
D | txfm_common_avx2.h | 43 __m256i c0 = _mm256_srai_epi32(a0, cos_bit); in btf_16_w16_avx2() 44 __m256i c1 = _mm256_srai_epi32(a1, cos_bit); in btf_16_w16_avx2() 45 __m256i d0 = _mm256_srai_epi32(b0, cos_bit); in btf_16_w16_avx2() 46 __m256i d1 = _mm256_srai_epi32(b1, cos_bit); in btf_16_w16_avx2() 253 return _mm256_srai_epi32(tmp, bit); in av1_round_shift_32_avx2() 300 return _mm256_srai_epi32(b, NewSqrt2Bits); in scale_round_avx2()
|
D | avg_intrin_avx2.c | 400 b0 = _mm256_srai_epi32(b0, 1); in aom_highbd_hadamard_16x16_avx2() 401 b1 = _mm256_srai_epi32(b1, 1); in aom_highbd_hadamard_16x16_avx2() 402 b2 = _mm256_srai_epi32(b2, 1); in aom_highbd_hadamard_16x16_avx2() 403 b3 = _mm256_srai_epi32(b3, 1); in aom_highbd_hadamard_16x16_avx2() 441 b0 = _mm256_srai_epi32(b0, 2); in aom_highbd_hadamard_32x32_avx2() 442 b1 = _mm256_srai_epi32(b1, 2); in aom_highbd_hadamard_32x32_avx2() 443 b2 = _mm256_srai_epi32(b2, 2); in aom_highbd_hadamard_32x32_avx2() 444 b3 = _mm256_srai_epi32(b3, 2); in aom_highbd_hadamard_32x32_avx2()
|
D | convolve_avx2.h | 395 d = _mm256_srai_epi32(d, 1); in add_store_aligned_256() 414 const __m256i res_lo = _mm256_srai_epi32(wt_res_lo, DIST_PRECISION_BITS); in comp_avg() 415 const __m256i res_hi = _mm256_srai_epi32(wt_res_hi, DIST_PRECISION_BITS); in comp_avg() 445 res = _mm256_srai_epi32(wt_res, DIST_PRECISION_BITS); in highbd_comp_avg() 448 res = _mm256_srai_epi32(wt_res, 1); in highbd_comp_avg() 457 const __m256i res_round = _mm256_srai_epi32( in highbd_convolve_rounding()
|
D | blend_a64_mask_avx2.c | 40 _mm256_srai_epi32(_mm256_sub_epi32(res0_lo, *v_round_offset), shift); in blend_a64_d16_mask_w16_avx2() 42 _mm256_srai_epi32(_mm256_sub_epi32(res0_hi, *v_round_offset), shift); in blend_a64_d16_mask_w16_avx2() 68 _mm256_srai_epi32(_mm256_sub_epi32(res0_lo, *v_round_offset), shift); in blend_a64_d16_mask_w32_avx2() 70 _mm256_srai_epi32(_mm256_sub_epi32(res0_hi, *v_round_offset), shift); in blend_a64_d16_mask_w32_avx2() 72 _mm256_srai_epi32(_mm256_sub_epi32(res1_lo, *v_round_offset), shift); in blend_a64_d16_mask_w32_avx2() 74 _mm256_srai_epi32(_mm256_sub_epi32(res1_hi, *v_round_offset), shift); in blend_a64_d16_mask_w32_avx2() 942 _mm256_srai_epi32(_mm256_sub_epi32(sumh, *round_offset), shift); in highbd_blend_a64_d16_mask_w4_avx2() 944 _mm256_srai_epi32(_mm256_sub_epi32(suml, *round_offset), shift); in highbd_blend_a64_d16_mask_w4_avx2() 1074 _mm256_srai_epi32(_mm256_sub_epi32(sumah, *round_offset), shift); in highbd_blend_a64_d16_mask_w8_avx2() 1076 _mm256_srai_epi32(_mm256_sub_epi32(sumal, *round_offset), shift); in highbd_blend_a64_d16_mask_w8_avx2() [all …]
|
D | masked_sad_intrin_avx2.c | 229 pred_l = _mm256_srai_epi32(_mm256_add_epi32(pred_l, round_const), in highbd_masked_sad8xh_avx2() 235 pred_r = _mm256_srai_epi32(_mm256_add_epi32(pred_r, round_const), in highbd_masked_sad8xh_avx2() 286 pred_l = _mm256_srai_epi32(_mm256_add_epi32(pred_l, round_const), in highbd_masked_sad16xh_avx2() 292 pred_r = _mm256_srai_epi32(_mm256_add_epi32(pred_r, round_const), in highbd_masked_sad16xh_avx2()
|
D | highbd_convolve_avx2.c | 489 *y = _mm256_srai_epi32(a, CONV8_ROUNDING_BITS); in filter_8x1_pixels() 600 _mm256_srai_epi32(_mm256_add_epi32(res, rounding), CONV8_ROUNDING_BITS); in aom_highbd_filter_block1d4_h4_avx2() 626 _mm256_srai_epi32(_mm256_add_epi32(res, rounding), CONV8_ROUNDING_BITS); in aom_highbd_filter_block1d4_h4_avx2() 676 res_even = _mm256_srai_epi32(_mm256_add_epi32(res_even, rounding), in aom_highbd_filter_block1d8_h4_avx2() 684 res_odd = _mm256_srai_epi32(_mm256_add_epi32(res_odd, rounding), in aom_highbd_filter_block1d8_h4_avx2() 714 _mm256_srai_epi32(_mm256_add_epi32(res, rounding), CONV8_ROUNDING_BITS); in aom_highbd_filter_block1d8_h4_avx2() 796 *y0 = _mm256_srai_epi32(x0, CONV8_ROUNDING_BITS); in filter_16_2t_pixels() 797 *y1 = _mm256_srai_epi32(x1, CONV8_ROUNDING_BITS); in filter_16_2t_pixels() 805 *y0 = _mm256_srai_epi32(x0, CONV8_ROUNDING_BITS); in filter_8x1_2t_pixels()
|
/external/XNNPACK/src/qs8-vaddc/gen/ |
D | minmax-avx2-mul32-ld64-x32.c | 48 …= _mm256_add_epi32(_mm256_and_si256(vacc01234567, vremainder_mask), _mm256_srai_epi32(vacc01234567… in xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x32() 49 …= _mm256_add_epi32(_mm256_and_si256(vacc89ABCDEF, vremainder_mask), _mm256_srai_epi32(vacc89ABCDEF… in xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x32() 50 …= _mm256_add_epi32(_mm256_and_si256(vaccGHIJKLMN, vremainder_mask), _mm256_srai_epi32(vaccGHIJKLMN… in xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x32() 51 …= _mm256_add_epi32(_mm256_and_si256(vaccOPQRSTUV, vremainder_mask), _mm256_srai_epi32(vaccOPQRSTUV… in xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x32() 78 …= _mm256_add_epi32(_mm256_and_si256(vacc01234567, vremainder_mask), _mm256_srai_epi32(vacc01234567… in xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x32()
|
D | minmax-avx2-mul32-ld64-x24.c | 46 …= _mm256_add_epi32(_mm256_and_si256(vacc01234567, vremainder_mask), _mm256_srai_epi32(vacc01234567… in xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x24() 47 …= _mm256_add_epi32(_mm256_and_si256(vacc89ABCDEF, vremainder_mask), _mm256_srai_epi32(vacc89ABCDEF… in xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x24() 48 …= _mm256_add_epi32(_mm256_and_si256(vaccGHIJKLMN, vremainder_mask), _mm256_srai_epi32(vaccGHIJKLMN… in xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x24() 74 …= _mm256_add_epi32(_mm256_and_si256(vacc01234567, vremainder_mask), _mm256_srai_epi32(vacc01234567… in xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x24()
|
D | minmax-avx2-mul32-ld64-x16.c | 44 …= _mm256_add_epi32(_mm256_and_si256(vacc01234567, vremainder_mask), _mm256_srai_epi32(vacc01234567… in xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x16() 45 …= _mm256_add_epi32(_mm256_and_si256(vacc89ABCDEF, vremainder_mask), _mm256_srai_epi32(vacc89ABCDEF… in xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x16() 66 …= _mm256_add_epi32(_mm256_and_si256(vacc01234567, vremainder_mask), _mm256_srai_epi32(vacc01234567… in xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x16()
|
D | minmax-avx2-mul32-ld64-x8.c | 42 …= _mm256_add_epi32(_mm256_and_si256(vacc01234567, vremainder_mask), _mm256_srai_epi32(vacc01234567… in xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x8() 61 …= _mm256_add_epi32(_mm256_and_si256(vacc01234567, vremainder_mask), _mm256_srai_epi32(vacc01234567… in xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x8()
|
/external/XNNPACK/src/qs8-vadd/gen/ |
D | minmax-avx2-mul32-ld64-x32.c | 57 …= _mm256_add_epi32(_mm256_and_si256(vacc01234567, vremainder_mask), _mm256_srai_epi32(vacc01234567… in xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x32() 58 …= _mm256_add_epi32(_mm256_and_si256(vacc89ABCDEF, vremainder_mask), _mm256_srai_epi32(vacc89ABCDEF… in xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x32() 59 …= _mm256_add_epi32(_mm256_and_si256(vaccGHIJKLMN, vremainder_mask), _mm256_srai_epi32(vaccGHIJKLMN… in xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x32() 60 …= _mm256_add_epi32(_mm256_and_si256(vaccOPQRSTUV, vremainder_mask), _mm256_srai_epi32(vaccOPQRSTUV… in xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x32() 91 …= _mm256_add_epi32(_mm256_and_si256(vacc01234567, vremainder_mask), _mm256_srai_epi32(vacc01234567… in xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x32()
|
D | minmax-avx2-mul32-ld64-x24.c | 53 …= _mm256_add_epi32(_mm256_and_si256(vacc01234567, vremainder_mask), _mm256_srai_epi32(vacc01234567… in xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x24() 54 …= _mm256_add_epi32(_mm256_and_si256(vacc89ABCDEF, vremainder_mask), _mm256_srai_epi32(vacc89ABCDEF… in xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x24() 55 …= _mm256_add_epi32(_mm256_and_si256(vaccGHIJKLMN, vremainder_mask), _mm256_srai_epi32(vaccGHIJKLMN… in xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x24() 85 …= _mm256_add_epi32(_mm256_and_si256(vacc01234567, vremainder_mask), _mm256_srai_epi32(vacc01234567… in xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x24()
|
D | minmax-avx2-mul32-ld64-x16.c | 49 …= _mm256_add_epi32(_mm256_and_si256(vacc01234567, vremainder_mask), _mm256_srai_epi32(vacc01234567… in xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x16() 50 …= _mm256_add_epi32(_mm256_and_si256(vacc89ABCDEF, vremainder_mask), _mm256_srai_epi32(vacc89ABCDEF… in xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x16() 75 …= _mm256_add_epi32(_mm256_and_si256(vacc01234567, vremainder_mask), _mm256_srai_epi32(vacc01234567… in xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x16()
|
D | minmax-avx2-mul32-ld64-x8.c | 45 …= _mm256_add_epi32(_mm256_and_si256(vacc01234567, vremainder_mask), _mm256_srai_epi32(vacc01234567… in xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x8() 67 …= _mm256_add_epi32(_mm256_and_si256(vacc01234567, vremainder_mask), _mm256_srai_epi32(vacc01234567… in xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x8()
|
/external/libaom/libaom/aom_dsp/simd/ |
D | v256_intrinsics_x86.h | 221 _mm256_packs_epi32(_mm256_srai_epi32(b, 16), _mm256_srai_epi32(a, 16)), in v256_unziphi_16() 326 return _mm256_srai_epi32( in v256_unpacklo_s16_s32() 339 return _mm256_srai_epi32( in v256_unpackhi_s16_s32() 694 #define v256_shr_n_s32(a, c) _mm256_srai_epi32(a, c)
|
/external/gemmlowp/fixedpoint/ |
D | fixedpoint_avx.h | 144 return _mm256_srai_epi32(a, offset); 297 rounded_half_sum = _mm256_srai_epi32(Add(sum, one), 1);
|