Home
last modified time | relevance | path

Searched refs:_mm256_madd_epi16 (Results 1 – 25 of 56) sorted by relevance

123

/external/libaom/libaom/av1/encoder/x86/
Dwedge_utils_avx2.c56 const __m256i v_t0l_d = _mm256_madd_epi16(v_rd0l_w, v_m0l_w); in av1_wedge_sse_from_residuals_avx2()
57 const __m256i v_t0h_d = _mm256_madd_epi16(v_rd0h_w, v_m0h_w); in av1_wedge_sse_from_residuals_avx2()
61 const __m256i v_sq0_d = _mm256_madd_epi16(v_t0_w, v_t0_w); in av1_wedge_sse_from_residuals_avx2()
116 const __m256i v_p0_d = _mm256_madd_epi16(v_d0_w, v_m0_w); in av1_wedge_sign_from_residuals_avx2()
117 const __m256i v_p1_d = _mm256_madd_epi16(v_d1_w, v_m1_w); in av1_wedge_sign_from_residuals_avx2()
118 const __m256i v_p2_d = _mm256_madd_epi16(v_d2_w, v_m2_w); in av1_wedge_sign_from_residuals_avx2()
119 const __m256i v_p3_d = _mm256_madd_epi16(v_d3_w, v_m3_w); in av1_wedge_sign_from_residuals_avx2()
191 const __m256i v_r0l_w = _mm256_madd_epi16(v_ab0l_w, v_abl0n_w); in av1_wedge_compute_delta_squares_avx2()
192 const __m256i v_r0h_w = _mm256_madd_epi16(v_ab0h_w, v_abh0n_w); in av1_wedge_compute_delta_squares_avx2()
193 const __m256i v_r1l_w = _mm256_madd_epi16(v_ab1l_w, v_abl1n_w); in av1_wedge_compute_delta_squares_avx2()
[all …]
Derror_intrin_avx2.c47 const __m256i error_lo = _mm256_madd_epi16(diff, diff); in av1_block_error_lp_avx2()
60 const __m256i error = _mm256_madd_epi16(diff, diff); in av1_block_error_lp_avx2()
106 dqcoeff_reg = _mm256_madd_epi16(dqcoeff_reg, dqcoeff_reg); in av1_block_error_avx2()
108 coeff_reg = _mm256_madd_epi16(coeff_reg, coeff_reg); in av1_block_error_avx2()
Drdopt_avx2.c41 const __m256i madd_xy = _mm256_madd_epi16(pixels, slli); in horver_correlation_4x4()
49 const __m256i madd_xz = _mm256_madd_epi16(slli, perm); in horver_correlation_4x4()
54 const __m256i madd1_slli = _mm256_madd_epi16(slli, _mm256_set1_epi16(1)); in horver_correlation_4x4()
58 const __m256i madd_slli = _mm256_madd_epi16(slli, slli); in horver_correlation_4x4()
/external/libaom/libaom/av1/common/x86/
Dhighbd_wiener_convolve_avx2.c105 const __m256i res_0 = _mm256_madd_epi16(src_0, coeffs_01); in av1_highbd_wiener_convolve_add_src_avx2()
106 const __m256i res_1 = _mm256_madd_epi16(src_1, coeffs_01); in av1_highbd_wiener_convolve_add_src_avx2()
107 const __m256i res_2 = _mm256_madd_epi16(src_2, coeffs_23); in av1_highbd_wiener_convolve_add_src_avx2()
108 const __m256i res_3 = _mm256_madd_epi16(src_3, coeffs_23); in av1_highbd_wiener_convolve_add_src_avx2()
109 const __m256i res_4 = _mm256_madd_epi16(src_4, coeffs_45); in av1_highbd_wiener_convolve_add_src_avx2()
110 const __m256i res_5 = _mm256_madd_epi16(src_5, coeffs_45); in av1_highbd_wiener_convolve_add_src_avx2()
111 const __m256i res_6 = _mm256_madd_epi16(src_6, coeffs_67); in av1_highbd_wiener_convolve_add_src_avx2()
112 const __m256i res_7 = _mm256_madd_epi16(src_7, coeffs_67); in av1_highbd_wiener_convolve_add_src_avx2()
196 const __m256i res_0 = _mm256_madd_epi16(src_0, coeffs_01); in av1_highbd_wiener_convolve_add_src_avx2()
197 const __m256i res_2 = _mm256_madd_epi16(src_2, coeffs_23); in av1_highbd_wiener_convolve_add_src_avx2()
[all …]
/external/libvpx/libvpx/vpx_dsp/x86/
Dfwd_dct32x32_impl_avx2.h365 const __m256i s2_20_2 = _mm256_madd_epi16(s2_20_0, k__cospi_p16_m16); in FDCT32x32_2D_AVX2()
366 const __m256i s2_20_3 = _mm256_madd_epi16(s2_20_1, k__cospi_p16_m16); in FDCT32x32_2D_AVX2()
367 const __m256i s2_21_2 = _mm256_madd_epi16(s2_21_0, k__cospi_p16_m16); in FDCT32x32_2D_AVX2()
368 const __m256i s2_21_3 = _mm256_madd_epi16(s2_21_1, k__cospi_p16_m16); in FDCT32x32_2D_AVX2()
369 const __m256i s2_22_2 = _mm256_madd_epi16(s2_22_0, k__cospi_p16_m16); in FDCT32x32_2D_AVX2()
370 const __m256i s2_22_3 = _mm256_madd_epi16(s2_22_1, k__cospi_p16_m16); in FDCT32x32_2D_AVX2()
371 const __m256i s2_23_2 = _mm256_madd_epi16(s2_23_0, k__cospi_p16_m16); in FDCT32x32_2D_AVX2()
372 const __m256i s2_23_3 = _mm256_madd_epi16(s2_23_1, k__cospi_p16_m16); in FDCT32x32_2D_AVX2()
373 const __m256i s2_24_2 = _mm256_madd_epi16(s2_23_0, k__cospi_p16_p16); in FDCT32x32_2D_AVX2()
374 const __m256i s2_24_3 = _mm256_madd_epi16(s2_23_1, k__cospi_p16_p16); in FDCT32x32_2D_AVX2()
[all …]
/external/flac/src/libFLAC/
Dlpc_intrin_avx2.c80 summ = _mm256_madd_epi16(q11, _mm256_loadu_si256((const __m256i*)(data+i-12))); in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_avx2()
81 …mull = _mm256_madd_epi16(q10, _mm256_loadu_si256((const __m256i*)(data+i-11))); summ = _mm256_add_… in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_avx2()
82 …mull = _mm256_madd_epi16(q9, _mm256_loadu_si256((const __m256i*)(data+i-10))); summ = _mm256_add_… in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_avx2()
83 …mull = _mm256_madd_epi16(q8, _mm256_loadu_si256((const __m256i*)(data+i-9 ))); summ = _mm256_add_… in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_avx2()
84 …mull = _mm256_madd_epi16(q7, _mm256_loadu_si256((const __m256i*)(data+i-8 ))); summ = _mm256_add_… in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_avx2()
85 …mull = _mm256_madd_epi16(q6, _mm256_loadu_si256((const __m256i*)(data+i-7 ))); summ = _mm256_add_… in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_avx2()
86 …mull = _mm256_madd_epi16(q5, _mm256_loadu_si256((const __m256i*)(data+i-6 ))); summ = _mm256_add_… in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_avx2()
87 …mull = _mm256_madd_epi16(q4, _mm256_loadu_si256((const __m256i*)(data+i-5 ))); summ = _mm256_add_… in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_avx2()
88 …mull = _mm256_madd_epi16(q3, _mm256_loadu_si256((const __m256i*)(data+i-4 ))); summ = _mm256_add_… in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_avx2()
89 …mull = _mm256_madd_epi16(q2, _mm256_loadu_si256((const __m256i*)(data+i-3 ))); summ = _mm256_add_… in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_avx2()
[all …]
/external/libvpx/libvpx/vp9/encoder/x86/
Dvp9_error_avx2.c37 dqcoeff_256 = _mm256_madd_epi16(dqcoeff_256, dqcoeff_256); in vp9_block_error_avx2()
39 coeff_256 = _mm256_madd_epi16(coeff_256, coeff_256); in vp9_block_error_avx2()
66 dqcoeff_0 = _mm256_madd_epi16(dqcoeff_0, dqcoeff_0); in vp9_block_error_avx2()
67 dqcoeff_1 = _mm256_madd_epi16(dqcoeff_1, dqcoeff_1); in vp9_block_error_avx2()
69 coeff_0 = _mm256_madd_epi16(coeff_0, coeff_0); in vp9_block_error_avx2()
70 coeff_1 = _mm256_madd_epi16(coeff_1, coeff_1); in vp9_block_error_avx2()
125 const __m256i error_lo = _mm256_madd_epi16(diff, diff); in vp9_block_error_fp_avx2()
138 const __m256i error = _mm256_madd_epi16(diff, diff); in vp9_block_error_fp_avx2()
/external/libaom/libaom/aom_dsp/x86/
Dsum_squares_avx2.c38 const __m256i v_sq_0_d = _mm256_madd_epi16(v_val_0_w, v_val_0_w); in aom_sum_squares_2d_i16_nxn_avx2()
39 const __m256i v_sq_1_d = _mm256_madd_epi16(v_val_1_w, v_val_1_w); in aom_sum_squares_2d_i16_nxn_avx2()
40 const __m256i v_sq_2_d = _mm256_madd_epi16(v_val_2_w, v_val_2_w); in aom_sum_squares_2d_i16_nxn_avx2()
41 const __m256i v_sq_3_d = _mm256_madd_epi16(v_val_3_w, v_val_3_w); in aom_sum_squares_2d_i16_nxn_avx2()
132 __m256i vsqs0 = _mm256_madd_epi16(vsrc0, vsrc0); in aom_var_2d_u8_avx2()
133 __m256i vsqs1 = _mm256_madd_epi16(vsrc1, vsrc1); in aom_var_2d_u8_avx2()
153 __m256i vsqs0 = _mm256_madd_epi16(vsrc0, vsrc0); in aom_var_2d_u8_avx2()
154 __m256i vsqs1 = _mm256_madd_epi16(vsrc1, vsrc1); in aom_var_2d_u8_avx2()
206 __m256i vsqs0 = _mm256_madd_epi16(vsrc[k], vsrc[k]); in aom_var_2d_u16_avx2()
225 __m256i vsqs0 = _mm256_madd_epi16(vsrc, vsrc); in aom_var_2d_u16_avx2()
Dblk_sse_sum_avx2.c68 row_sum_buffer = _mm256_madd_epi16(load_pixels, one); in sse_sum_wd4_avx2()
69 row_sse_buffer = _mm256_madd_epi16(load_pixels, load_pixels); in sse_sum_wd4_avx2()
104 row_sum_buffer = _mm256_madd_epi16(load_pixels, one); in sse_sum_wd8_avx2()
105 row_sse_buffer = _mm256_madd_epi16(load_pixels, load_pixels); in sse_sum_wd8_avx2()
137 row_sum_buffer = _mm256_madd_epi16(load_pixels, one); in sse_sum_wd16_avx2()
138 row_sse_buffer = _mm256_madd_epi16(load_pixels, load_pixels); in sse_sum_wd16_avx2()
Dsse_avx2.c31 *sum = _mm256_add_epi32(*sum, _mm256_madd_epi16(v_d00_w, v_d00_w)); in sse_w32_avx2()
32 *sum = _mm256_add_epi32(*sum, _mm256_madd_epi16(v_d01_w, v_d01_w)); in sse_w32_avx2()
86 *sum = _mm256_add_epi32(*sum, _mm256_madd_epi16(v_d_w, v_d_w)); in sse_w4x4_avx2()
97 *sum = _mm256_add_epi32(*sum, _mm256_madd_epi16(v_d_w, v_d_w)); in sse_w8x2_avx2()
141 _mm256_add_epi32(_mm256_madd_epi16(v_asub, v_asub), in aom_sse_avx2()
142 _mm256_madd_epi16(v_bsub, v_bsub)); in aom_sse_avx2()
222 *sum = _mm256_add_epi32(*sum, _mm256_madd_epi16(v_d_w, v_d_w)); in highbd_sse_w16_avx2()
241 *sum = _mm256_add_epi32(*sum, _mm256_madd_epi16(v_d_w, v_d_w)); in highbd_sse_w4x4_avx2()
250 *sum = _mm256_add_epi32(*sum, _mm256_madd_epi16(v_d_w, v_d_w)); in highbd_sse_w8x2_avx2()
Dobmc_variance_avx2.c53 const __m256i v_pm_d = _mm256_madd_epi16(v_p0_d, v_m_d); in obmc_variance_w8n()
113 const __m256i v_pm0_d = _mm256_madd_epi16(v_p0_d, v_m0_d); in obmc_variance_w16n()
114 const __m256i v_pm1_d = _mm256_madd_epi16(v_p1_d, v_m1_d); in obmc_variance_w16n()
132 const __m256i v_sqrdiff_d = _mm256_madd_epi16(v_rdiff01_w, v_rdiff01_w); in obmc_variance_w16n()
Dconvolve_avx2.h344 const __m256i res_0 = _mm256_madd_epi16(s[0], coeffs[0]); in convolve()
345 const __m256i res_1 = _mm256_madd_epi16(s[1], coeffs[1]); in convolve()
346 const __m256i res_2 = _mm256_madd_epi16(s[2], coeffs[2]); in convolve()
347 const __m256i res_3 = _mm256_madd_epi16(s[3], coeffs[3]); in convolve()
357 const __m256i res_1 = _mm256_madd_epi16(s[0], coeffs[0]); in convolve_4tap()
358 const __m256i res_2 = _mm256_madd_epi16(s[1], coeffs[1]); in convolve_4tap()
411 const __m256i wt_res_lo = _mm256_madd_epi16(data_lo, *wt); in comp_avg()
412 const __m256i wt_res_hi = _mm256_madd_epi16(data_hi, *wt); in comp_avg()
Dobmc_sad_avx2.c49 const __m256i v_pm_d = _mm256_madd_epi16(v_p_d, v_m_d); in obmc_sad_w4_avx2()
90 const __m256i v_pm0_d = _mm256_madd_epi16(v_p0_d, v_m0_d); in obmc_sad_w8n_avx2()
171 const __m256i v_pm_d = _mm256_madd_epi16(v_p_d, v_m_d); in hbd_obmc_sad_w4_avx2()
216 const __m256i v_pm0_d = _mm256_madd_epi16(v_p0_d, v_m0_d); in hbd_obmc_sad_w8n_avx2()
Dhighbd_variance_avx2.c36 const __m256i v_sqrdiff = _mm256_madd_epi16(v_diff, v_diff); in aom_highbd_calc8x8var_avx2()
66 const __m256i v_sqrdiff = _mm256_madd_epi16(v_diff, v_diff); in aom_highbd_calc16x16var_avx2()
72 __m256i v_sum0 = _mm256_madd_epi16(v_sum_d, one); in aom_highbd_calc16x16var_avx2()
Dmasked_sad_intrin_avx2.c228 __m256i pred_l = _mm256_madd_epi16(data_l, mask_l); in highbd_masked_sad8xh_avx2()
234 __m256i pred_r = _mm256_madd_epi16(data_r, mask_r); in highbd_masked_sad8xh_avx2()
245 res = _mm256_add_epi32(res, _mm256_madd_epi16(diff, one)); in highbd_masked_sad8xh_avx2()
285 __m256i pred_l = _mm256_madd_epi16(data_l, mask_l); in highbd_masked_sad16xh_avx2()
291 __m256i pred_r = _mm256_madd_epi16(data_r, mask_r); in highbd_masked_sad16xh_avx2()
302 res = _mm256_add_epi32(res, _mm256_madd_epi16(diff, one)); in highbd_masked_sad16xh_avx2()
/external/XNNPACK/src/qs8-igemm/gen/
D3x8c8-minmax-avx2.c109 vacc0x01 = _mm256_add_epi32(vacc0x01, _mm256_madd_epi16(vxa0, vxb01)); in xnn_qs8_igemm_minmax_ukernel_3x8c8__avx2()
110 vacc1x01 = _mm256_add_epi32(vacc1x01, _mm256_madd_epi16(vxa1, vxb01)); in xnn_qs8_igemm_minmax_ukernel_3x8c8__avx2()
111 vacc2x01 = _mm256_add_epi32(vacc2x01, _mm256_madd_epi16(vxa2, vxb01)); in xnn_qs8_igemm_minmax_ukernel_3x8c8__avx2()
115 vacc0x23 = _mm256_add_epi32(vacc0x23, _mm256_madd_epi16(vxa0, vxb23)); in xnn_qs8_igemm_minmax_ukernel_3x8c8__avx2()
116 vacc1x23 = _mm256_add_epi32(vacc1x23, _mm256_madd_epi16(vxa1, vxb23)); in xnn_qs8_igemm_minmax_ukernel_3x8c8__avx2()
117 vacc2x23 = _mm256_add_epi32(vacc2x23, _mm256_madd_epi16(vxa2, vxb23)); in xnn_qs8_igemm_minmax_ukernel_3x8c8__avx2()
121 vacc0x45 = _mm256_add_epi32(vacc0x45, _mm256_madd_epi16(vxa0, vxb45)); in xnn_qs8_igemm_minmax_ukernel_3x8c8__avx2()
122 vacc1x45 = _mm256_add_epi32(vacc1x45, _mm256_madd_epi16(vxa1, vxb45)); in xnn_qs8_igemm_minmax_ukernel_3x8c8__avx2()
123 vacc2x45 = _mm256_add_epi32(vacc2x45, _mm256_madd_epi16(vxa2, vxb45)); in xnn_qs8_igemm_minmax_ukernel_3x8c8__avx2()
127 vacc0x67 = _mm256_add_epi32(vacc0x67, _mm256_madd_epi16(vxa0, vxb67)); in xnn_qs8_igemm_minmax_ukernel_3x8c8__avx2()
[all …]
D2x8c8-minmax-avx2.c94 vacc0x01 = _mm256_add_epi32(vacc0x01, _mm256_madd_epi16(vxa0, vxb01)); in xnn_qs8_igemm_minmax_ukernel_2x8c8__avx2()
95 vacc1x01 = _mm256_add_epi32(vacc1x01, _mm256_madd_epi16(vxa1, vxb01)); in xnn_qs8_igemm_minmax_ukernel_2x8c8__avx2()
99 vacc0x23 = _mm256_add_epi32(vacc0x23, _mm256_madd_epi16(vxa0, vxb23)); in xnn_qs8_igemm_minmax_ukernel_2x8c8__avx2()
100 vacc1x23 = _mm256_add_epi32(vacc1x23, _mm256_madd_epi16(vxa1, vxb23)); in xnn_qs8_igemm_minmax_ukernel_2x8c8__avx2()
104 vacc0x45 = _mm256_add_epi32(vacc0x45, _mm256_madd_epi16(vxa0, vxb45)); in xnn_qs8_igemm_minmax_ukernel_2x8c8__avx2()
105 vacc1x45 = _mm256_add_epi32(vacc1x45, _mm256_madd_epi16(vxa1, vxb45)); in xnn_qs8_igemm_minmax_ukernel_2x8c8__avx2()
109 vacc0x67 = _mm256_add_epi32(vacc0x67, _mm256_madd_epi16(vxa0, vxb67)); in xnn_qs8_igemm_minmax_ukernel_2x8c8__avx2()
110 vacc1x67 = _mm256_add_epi32(vacc1x67, _mm256_madd_epi16(vxa1, vxb67)); in xnn_qs8_igemm_minmax_ukernel_2x8c8__avx2()
D1x8c8-minmax-avx2.c79 vacc0x01 = _mm256_add_epi32(vacc0x01, _mm256_madd_epi16(vxa0, vxb01)); in xnn_qs8_igemm_minmax_ukernel_1x8c8__avx2()
83 vacc0x23 = _mm256_add_epi32(vacc0x23, _mm256_madd_epi16(vxa0, vxb23)); in xnn_qs8_igemm_minmax_ukernel_1x8c8__avx2()
87 vacc0x45 = _mm256_add_epi32(vacc0x45, _mm256_madd_epi16(vxa0, vxb45)); in xnn_qs8_igemm_minmax_ukernel_1x8c8__avx2()
91 vacc0x67 = _mm256_add_epi32(vacc0x67, _mm256_madd_epi16(vxa0, vxb67)); in xnn_qs8_igemm_minmax_ukernel_1x8c8__avx2()
/external/XNNPACK/src/qs8-gemm/gen/
D3x8c8-minmax-avx2.c94 vacc0x01 = _mm256_add_epi32(vacc0x01, _mm256_madd_epi16(vxa0, vxb01)); in xnn_qs8_gemm_minmax_ukernel_3x8c8__avx2()
95 vacc1x01 = _mm256_add_epi32(vacc1x01, _mm256_madd_epi16(vxa1, vxb01)); in xnn_qs8_gemm_minmax_ukernel_3x8c8__avx2()
96 vacc2x01 = _mm256_add_epi32(vacc2x01, _mm256_madd_epi16(vxa2, vxb01)); in xnn_qs8_gemm_minmax_ukernel_3x8c8__avx2()
100 vacc0x23 = _mm256_add_epi32(vacc0x23, _mm256_madd_epi16(vxa0, vxb23)); in xnn_qs8_gemm_minmax_ukernel_3x8c8__avx2()
101 vacc1x23 = _mm256_add_epi32(vacc1x23, _mm256_madd_epi16(vxa1, vxb23)); in xnn_qs8_gemm_minmax_ukernel_3x8c8__avx2()
102 vacc2x23 = _mm256_add_epi32(vacc2x23, _mm256_madd_epi16(vxa2, vxb23)); in xnn_qs8_gemm_minmax_ukernel_3x8c8__avx2()
106 vacc0x45 = _mm256_add_epi32(vacc0x45, _mm256_madd_epi16(vxa0, vxb45)); in xnn_qs8_gemm_minmax_ukernel_3x8c8__avx2()
107 vacc1x45 = _mm256_add_epi32(vacc1x45, _mm256_madd_epi16(vxa1, vxb45)); in xnn_qs8_gemm_minmax_ukernel_3x8c8__avx2()
108 vacc2x45 = _mm256_add_epi32(vacc2x45, _mm256_madd_epi16(vxa2, vxb45)); in xnn_qs8_gemm_minmax_ukernel_3x8c8__avx2()
112 vacc0x67 = _mm256_add_epi32(vacc0x67, _mm256_madd_epi16(vxa0, vxb67)); in xnn_qs8_gemm_minmax_ukernel_3x8c8__avx2()
[all …]
D3x8c8-xw-minmax-avx2.c93 vacc0x01 = _mm256_add_epi32(vacc0x01, _mm256_madd_epi16(vxa0, vxb01)); in xnn_qs8_gemm_xw_minmax_ukernel_3x8c8__avx2()
94 vacc1x01 = _mm256_add_epi32(vacc1x01, _mm256_madd_epi16(vxa1, vxb01)); in xnn_qs8_gemm_xw_minmax_ukernel_3x8c8__avx2()
95 vacc2x01 = _mm256_add_epi32(vacc2x01, _mm256_madd_epi16(vxa2, vxb01)); in xnn_qs8_gemm_xw_minmax_ukernel_3x8c8__avx2()
98 vacc0x23 = _mm256_add_epi32(vacc0x23, _mm256_madd_epi16(vxa0, vxb23)); in xnn_qs8_gemm_xw_minmax_ukernel_3x8c8__avx2()
99 vacc1x23 = _mm256_add_epi32(vacc1x23, _mm256_madd_epi16(vxa1, vxb23)); in xnn_qs8_gemm_xw_minmax_ukernel_3x8c8__avx2()
100 vacc2x23 = _mm256_add_epi32(vacc2x23, _mm256_madd_epi16(vxa2, vxb23)); in xnn_qs8_gemm_xw_minmax_ukernel_3x8c8__avx2()
103 vacc0x45 = _mm256_add_epi32(vacc0x45, _mm256_madd_epi16(vxa0, vxb45)); in xnn_qs8_gemm_xw_minmax_ukernel_3x8c8__avx2()
104 vacc1x45 = _mm256_add_epi32(vacc1x45, _mm256_madd_epi16(vxa1, vxb45)); in xnn_qs8_gemm_xw_minmax_ukernel_3x8c8__avx2()
105 vacc2x45 = _mm256_add_epi32(vacc2x45, _mm256_madd_epi16(vxa2, vxb45)); in xnn_qs8_gemm_xw_minmax_ukernel_3x8c8__avx2()
108 vacc0x67 = _mm256_add_epi32(vacc0x67, _mm256_madd_epi16(vxa0, vxb67)); in xnn_qs8_gemm_xw_minmax_ukernel_3x8c8__avx2()
[all …]
D2x8c8-minmax-avx2.c81 vacc0x01 = _mm256_add_epi32(vacc0x01, _mm256_madd_epi16(vxa0, vxb01)); in xnn_qs8_gemm_minmax_ukernel_2x8c8__avx2()
82 vacc1x01 = _mm256_add_epi32(vacc1x01, _mm256_madd_epi16(vxa1, vxb01)); in xnn_qs8_gemm_minmax_ukernel_2x8c8__avx2()
86 vacc0x23 = _mm256_add_epi32(vacc0x23, _mm256_madd_epi16(vxa0, vxb23)); in xnn_qs8_gemm_minmax_ukernel_2x8c8__avx2()
87 vacc1x23 = _mm256_add_epi32(vacc1x23, _mm256_madd_epi16(vxa1, vxb23)); in xnn_qs8_gemm_minmax_ukernel_2x8c8__avx2()
91 vacc0x45 = _mm256_add_epi32(vacc0x45, _mm256_madd_epi16(vxa0, vxb45)); in xnn_qs8_gemm_minmax_ukernel_2x8c8__avx2()
92 vacc1x45 = _mm256_add_epi32(vacc1x45, _mm256_madd_epi16(vxa1, vxb45)); in xnn_qs8_gemm_minmax_ukernel_2x8c8__avx2()
96 vacc0x67 = _mm256_add_epi32(vacc0x67, _mm256_madd_epi16(vxa0, vxb67)); in xnn_qs8_gemm_minmax_ukernel_2x8c8__avx2()
97 vacc1x67 = _mm256_add_epi32(vacc1x67, _mm256_madd_epi16(vxa1, vxb67)); in xnn_qs8_gemm_minmax_ukernel_2x8c8__avx2()
D2x8c8-xw-minmax-avx2.c80 vacc0x01 = _mm256_add_epi32(vacc0x01, _mm256_madd_epi16(vxa0, vxb01)); in xnn_qs8_gemm_xw_minmax_ukernel_2x8c8__avx2()
81 vacc1x01 = _mm256_add_epi32(vacc1x01, _mm256_madd_epi16(vxa1, vxb01)); in xnn_qs8_gemm_xw_minmax_ukernel_2x8c8__avx2()
84 vacc0x23 = _mm256_add_epi32(vacc0x23, _mm256_madd_epi16(vxa0, vxb23)); in xnn_qs8_gemm_xw_minmax_ukernel_2x8c8__avx2()
85 vacc1x23 = _mm256_add_epi32(vacc1x23, _mm256_madd_epi16(vxa1, vxb23)); in xnn_qs8_gemm_xw_minmax_ukernel_2x8c8__avx2()
88 vacc0x45 = _mm256_add_epi32(vacc0x45, _mm256_madd_epi16(vxa0, vxb45)); in xnn_qs8_gemm_xw_minmax_ukernel_2x8c8__avx2()
89 vacc1x45 = _mm256_add_epi32(vacc1x45, _mm256_madd_epi16(vxa1, vxb45)); in xnn_qs8_gemm_xw_minmax_ukernel_2x8c8__avx2()
92 vacc0x67 = _mm256_add_epi32(vacc0x67, _mm256_madd_epi16(vxa0, vxb67)); in xnn_qs8_gemm_xw_minmax_ukernel_2x8c8__avx2()
93 vacc1x67 = _mm256_add_epi32(vacc1x67, _mm256_madd_epi16(vxa1, vxb67)); in xnn_qs8_gemm_xw_minmax_ukernel_2x8c8__avx2()
D1x8c8-minmax-avx2.c68 vacc0x01 = _mm256_add_epi32(vacc0x01, _mm256_madd_epi16(vxa0, vxb01)); in xnn_qs8_gemm_minmax_ukernel_1x8c8__avx2()
72 vacc0x23 = _mm256_add_epi32(vacc0x23, _mm256_madd_epi16(vxa0, vxb23)); in xnn_qs8_gemm_minmax_ukernel_1x8c8__avx2()
76 vacc0x45 = _mm256_add_epi32(vacc0x45, _mm256_madd_epi16(vxa0, vxb45)); in xnn_qs8_gemm_minmax_ukernel_1x8c8__avx2()
80 vacc0x67 = _mm256_add_epi32(vacc0x67, _mm256_madd_epi16(vxa0, vxb67)); in xnn_qs8_gemm_minmax_ukernel_1x8c8__avx2()
D1x8c8-xw-minmax-avx2.c67 vacc0x01 = _mm256_add_epi32(vacc0x01, _mm256_madd_epi16(vxa0, vxb01)); in xnn_qs8_gemm_xw_minmax_ukernel_1x8c8__avx2()
70 vacc0x23 = _mm256_add_epi32(vacc0x23, _mm256_madd_epi16(vxa0, vxb23)); in xnn_qs8_gemm_xw_minmax_ukernel_1x8c8__avx2()
73 vacc0x45 = _mm256_add_epi32(vacc0x45, _mm256_madd_epi16(vxa0, vxb45)); in xnn_qs8_gemm_xw_minmax_ukernel_1x8c8__avx2()
76 vacc0x67 = _mm256_add_epi32(vacc0x67, _mm256_madd_epi16(vxa0, vxb67)); in xnn_qs8_gemm_xw_minmax_ukernel_1x8c8__avx2()
/external/gemmlowp/internal/
Dpack_avx.h139 ymm7 = _mm256_madd_epi16(ymm6, one); in Pack()
145 ymm7 = _mm256_madd_epi16(ymm6, one); in Pack()
149 ymm7 = _mm256_madd_epi16(ymm6, one); in Pack()
153 ymm7 = _mm256_madd_epi16(ymm6, one); in Pack()
157 ymm7 = _mm256_madd_epi16(ymm6, one); in Pack()
161 ymm7 = _mm256_madd_epi16(ymm6, one); in Pack()
165 ymm7 = _mm256_madd_epi16(ymm6, one); in Pack()
169 ymm7 = _mm256_madd_epi16(ymm6, one); in Pack()

123