Home
last modified time | relevance | path

Searched refs:_mm256_sub_epi32 (Results 1 – 25 of 63) sorted by relevance

123

/external/libaom/libaom/av1/encoder/x86/
Dhighbd_fwd_txfm_avx2.c234 out1 = _mm256_sub_epi32(in0_w1, in1_w0); \
247 out1 = _mm256_sub_epi32(in0_w1, in1_w0); \
270 v[7] = _mm256_sub_epi32(in[0 * col_num + col], in[7 * col_num + col]); in fdct8_avx2()
272 u[6] = _mm256_sub_epi32(in[1 * col_num + col], in[6 * col_num + col]); in fdct8_avx2()
274 u[5] = _mm256_sub_epi32(in[2 * col_num + col], in[5 * col_num + col]); in fdct8_avx2()
276 v[4] = _mm256_sub_epi32(in[3 * col_num + col], in[4 * col_num + col]); in fdct8_avx2()
278 v[3] = _mm256_sub_epi32(u[0], u[3]); in fdct8_avx2()
280 v[2] = _mm256_sub_epi32(u[1], u[2]); in fdct8_avx2()
290 v[6] = _mm256_sub_epi32(u[0], v[6]); in fdct8_avx2()
302 u[1] = _mm256_sub_epi32(v[0], v[1]); in fdct8_avx2()
[all …]
Dav1_fwd_txfm_avx2.h32 __m256i temp1 = _mm256_sub_epi32(in0_w1, in1_w0); in btf_32_avx2_type0()
51 __m256i temp1 = _mm256_sub_epi32(in1_w0, in0_w1); in btf_32_avx2_type1()
71 __m256i temp1 = _mm256_sub_epi32(in0_w1, in1_w0); in btf_32_avx2_type0_new()
91 __m256i temp1 = _mm256_sub_epi32(in1_w0, in0_w1); in btf_32_avx2_type1_new()
Dhighbd_block_error_intrin_avx2.c33 __m256i diff1 = _mm256_sub_epi32(mm256_coeff, mm256_dqcoeff); in av1_highbd_block_error_avx2()
34 __m256i diff2 = _mm256_sub_epi32(mm256_coeff2, mm256_dqcoeff2); in av1_highbd_block_error_avx2()
Dpickrst_avx2.c656 s = _mm256_sub_epi32(s, d); in calc_proj_params_r0_r1_avx2()
657 f1 = _mm256_sub_epi32(f1, d); in calc_proj_params_r0_r1_avx2()
658 f2 = _mm256_sub_epi32(f2, d); in calc_proj_params_r0_r1_avx2()
750 s = _mm256_sub_epi32(s, d); in calc_proj_params_r0_avx2()
751 f1 = _mm256_sub_epi32(f1, d); in calc_proj_params_r0_avx2()
809 s = _mm256_sub_epi32(s, d); in calc_proj_params_r1_avx2()
810 f2 = _mm256_sub_epi32(f2, d); in calc_proj_params_r1_avx2()
904 const __m256i flt0l_subu = _mm256_sub_epi32(flt0l, u0l); in av1_highbd_pixel_proj_error_avx2()
905 const __m256i flt0h_subu = _mm256_sub_epi32(flt0h, u0h); in av1_highbd_pixel_proj_error_avx2()
906 const __m256i flt1l_subu = _mm256_sub_epi32(flt1l, u0l); in av1_highbd_pixel_proj_error_avx2()
[all …]
/external/libvpx/libvpx/vpx_dsp/x86/
Davg_intrin_avx2.c30 __m256i b1 = _mm256_sub_epi32(a0, a1); in highbd_hadamard_col8_avx2()
32 __m256i b3 = _mm256_sub_epi32(a2, a3); in highbd_hadamard_col8_avx2()
34 __m256i b5 = _mm256_sub_epi32(a4, a5); in highbd_hadamard_col8_avx2()
36 __m256i b7 = _mm256_sub_epi32(a6, a7); in highbd_hadamard_col8_avx2()
40 a2 = _mm256_sub_epi32(b0, b2); in highbd_hadamard_col8_avx2()
41 a3 = _mm256_sub_epi32(b1, b3); in highbd_hadamard_col8_avx2()
44 a6 = _mm256_sub_epi32(b4, b6); in highbd_hadamard_col8_avx2()
45 a7 = _mm256_sub_epi32(b5, b7); in highbd_hadamard_col8_avx2()
52 b2 = _mm256_sub_epi32(a0, a4); in highbd_hadamard_col8_avx2()
53 b6 = _mm256_sub_epi32(a1, a5); in highbd_hadamard_col8_avx2()
[all …]
Dfwd_dct32x32_impl_avx2.h1524 lstep1[4] = _mm256_sub_epi32(lstep3[2], lstep3[4]); in FDCT32x32_2D_AVX2()
1525 lstep1[5] = _mm256_sub_epi32(lstep3[3], lstep3[5]); in FDCT32x32_2D_AVX2()
1526 lstep1[6] = _mm256_sub_epi32(lstep3[0], lstep3[6]); in FDCT32x32_2D_AVX2()
1527 lstep1[7] = _mm256_sub_epi32(lstep3[1], lstep3[7]); in FDCT32x32_2D_AVX2()
1532 lstep1[20] = _mm256_sub_epi32(lstep2[18], lstep3[20]); in FDCT32x32_2D_AVX2()
1533 lstep1[21] = _mm256_sub_epi32(lstep2[19], lstep3[21]); in FDCT32x32_2D_AVX2()
1534 lstep1[22] = _mm256_sub_epi32(lstep2[16], lstep3[22]); in FDCT32x32_2D_AVX2()
1535 lstep1[23] = _mm256_sub_epi32(lstep2[17], lstep3[23]); in FDCT32x32_2D_AVX2()
1536 lstep1[24] = _mm256_sub_epi32(lstep2[30], lstep3[24]); in FDCT32x32_2D_AVX2()
1537 lstep1[25] = _mm256_sub_epi32(lstep2[31], lstep3[25]); in FDCT32x32_2D_AVX2()
[all …]
/external/libaom/libaom/aom_dsp/x86/
Davg_intrin_avx2.c271 __m256i b1 = _mm256_sub_epi32(a0, a1); in highbd_hadamard_col8_avx2()
273 __m256i b3 = _mm256_sub_epi32(a2, a3); in highbd_hadamard_col8_avx2()
275 __m256i b5 = _mm256_sub_epi32(a4, a5); in highbd_hadamard_col8_avx2()
277 __m256i b7 = _mm256_sub_epi32(a6, a7); in highbd_hadamard_col8_avx2()
281 a2 = _mm256_sub_epi32(b0, b2); in highbd_hadamard_col8_avx2()
282 a3 = _mm256_sub_epi32(b1, b3); in highbd_hadamard_col8_avx2()
285 a6 = _mm256_sub_epi32(b4, b6); in highbd_hadamard_col8_avx2()
286 a7 = _mm256_sub_epi32(b5, b7); in highbd_hadamard_col8_avx2()
293 b2 = _mm256_sub_epi32(a0, a4); in highbd_hadamard_col8_avx2()
294 b6 = _mm256_sub_epi32(a1, a5); in highbd_hadamard_col8_avx2()
[all …]
Dobmc_sad_avx2.c51 const __m256i v_diff_d = _mm256_sub_epi32(v_w_d, v_pm_d); in obmc_sad_w4_avx2()
92 const __m256i v_diff0_d = _mm256_sub_epi32(v_w0_d, v_pm0_d); in obmc_sad_w8n_avx2()
173 const __m256i v_diff_d = _mm256_sub_epi32(v_w_d, v_pm_d); in hbd_obmc_sad_w4_avx2()
218 const __m256i v_diff0_d = _mm256_sub_epi32(v_w0_d, v_pm0_d); in hbd_obmc_sad_w8n_avx2()
Dobmc_variance_avx2.c54 const __m256i v_diff0_d = _mm256_sub_epi32(v_w_d, v_pm_d); in obmc_variance_w8n()
116 const __m256i v_diff0_d = _mm256_sub_epi32(v_w0_d, v_pm0_d); in obmc_variance_w16n()
117 const __m256i v_diff1_d = _mm256_sub_epi32(v_w1_d, v_pm1_d); in obmc_variance_w16n()
Dblend_a64_mask_avx2.c40 _mm256_srai_epi32(_mm256_sub_epi32(res0_lo, *v_round_offset), shift); in blend_a64_d16_mask_w16_avx2()
42 _mm256_srai_epi32(_mm256_sub_epi32(res0_hi, *v_round_offset), shift); in blend_a64_d16_mask_w16_avx2()
68 _mm256_srai_epi32(_mm256_sub_epi32(res0_lo, *v_round_offset), shift); in blend_a64_d16_mask_w32_avx2()
70 _mm256_srai_epi32(_mm256_sub_epi32(res0_hi, *v_round_offset), shift); in blend_a64_d16_mask_w32_avx2()
72 _mm256_srai_epi32(_mm256_sub_epi32(res1_lo, *v_round_offset), shift); in blend_a64_d16_mask_w32_avx2()
74 _mm256_srai_epi32(_mm256_sub_epi32(res1_hi, *v_round_offset), shift); in blend_a64_d16_mask_w32_avx2()
942 _mm256_srai_epi32(_mm256_sub_epi32(sumh, *round_offset), shift); in highbd_blend_a64_d16_mask_w4_avx2()
944 _mm256_srai_epi32(_mm256_sub_epi32(suml, *round_offset), shift); in highbd_blend_a64_d16_mask_w4_avx2()
1074 _mm256_srai_epi32(_mm256_sub_epi32(sumah, *round_offset), shift); in highbd_blend_a64_d16_mask_w8_avx2()
1076 _mm256_srai_epi32(_mm256_sub_epi32(sumal, *round_offset), shift); in highbd_blend_a64_d16_mask_w8_avx2()
[all …]
/external/XNNPACK/src/qs8-vaddc/gen/
Dminmax-avx2-mul32-ld64-x32.c53 …vacc01234567 = _mm256_sub_epi32(_mm256_sra_epi32(vacc01234567, vshift), _mm256_cmpgt_epi32(vrem012… in xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x32()
54 …vacc89ABCDEF = _mm256_sub_epi32(_mm256_sra_epi32(vacc89ABCDEF, vshift), _mm256_cmpgt_epi32(vrem89A… in xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x32()
55 …vaccGHIJKLMN = _mm256_sub_epi32(_mm256_sra_epi32(vaccGHIJKLMN, vshift), _mm256_cmpgt_epi32(vremGHI… in xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x32()
56 …vaccOPQRSTUV = _mm256_sub_epi32(_mm256_sra_epi32(vaccOPQRSTUV, vshift), _mm256_cmpgt_epi32(vremOPQ… in xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x32()
80 …vacc01234567 = _mm256_sub_epi32(_mm256_sra_epi32(vacc01234567, vshift), _mm256_cmpgt_epi32(vrem012… in xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x32()
Dminmax-avx2-mul32-ld64-x24.c50 …vacc01234567 = _mm256_sub_epi32(_mm256_sra_epi32(vacc01234567, vshift), _mm256_cmpgt_epi32(vrem012… in xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x24()
51 …vacc89ABCDEF = _mm256_sub_epi32(_mm256_sra_epi32(vacc89ABCDEF, vshift), _mm256_cmpgt_epi32(vrem89A… in xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x24()
52 …vaccGHIJKLMN = _mm256_sub_epi32(_mm256_sra_epi32(vaccGHIJKLMN, vshift), _mm256_cmpgt_epi32(vremGHI… in xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x24()
76 …vacc01234567 = _mm256_sub_epi32(_mm256_sra_epi32(vacc01234567, vshift), _mm256_cmpgt_epi32(vrem012… in xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x24()
Dminmax-avx2-mul32-ld64-x16.c47 …vacc01234567 = _mm256_sub_epi32(_mm256_sra_epi32(vacc01234567, vshift), _mm256_cmpgt_epi32(vrem012… in xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x16()
48 …vacc89ABCDEF = _mm256_sub_epi32(_mm256_sra_epi32(vacc89ABCDEF, vshift), _mm256_cmpgt_epi32(vrem89A… in xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x16()
68 …vacc01234567 = _mm256_sub_epi32(_mm256_sra_epi32(vacc01234567, vshift), _mm256_cmpgt_epi32(vrem012… in xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x16()
Dminmax-avx2-mul32-ld64-x8.c44 …vacc01234567 = _mm256_sub_epi32(_mm256_sra_epi32(vacc01234567, vshift), _mm256_cmpgt_epi32(vrem012… in xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x8()
63 …vacc01234567 = _mm256_sub_epi32(_mm256_sra_epi32(vacc01234567, vshift), _mm256_cmpgt_epi32(vrem012… in xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x8()
/external/libaom/libaom/av1/common/x86/
Dselfguided_avx2.c123 _mm256_extract_epi32(_mm256_sub_epi32(row1, above1), 7)); in integral_images()
125 _mm256_extract_epi32(_mm256_sub_epi32(row2, above2), 7)); in integral_images()
171 _mm256_extract_epi32(_mm256_sub_epi32(row1, above1), 7)); in integral_images_highbd()
173 _mm256_extract_epi32(_mm256_sub_epi32(row2, above2), 7)); in integral_images_highbd()
185 const __m256i u = _mm256_sub_epi32(tr, tl); in boxsum_from_ii()
186 const __m256i v = _mm256_sub_epi32(br, bl); in boxsum_from_ii()
187 return _mm256_sub_epi32(v, u); in boxsum_from_ii()
213 return _mm256_sub_epi32(an, bb); in compute_p()
271 _mm256_sub_epi32(_mm256_set1_epi32(SGRPROJ_SGR), a_res); in calc_ab()
318 return _mm256_sub_epi32(_mm256_slli_epi32(_mm256_add_epi32(fours, threes), 2), in cross_sum()
[all …]
Dhighbd_inv_txfm_avx2.c146 __m256i a1 = _mm256_sub_epi32(offset, in1); in neg_shift_avx2()
268 __m256i a1 = _mm256_sub_epi32(in0, in1); in addsub_avx2()
1293 u[5] = _mm256_sub_epi32(y, x); in idct16_low8_avx2()
1314 u[10] = _mm256_sub_epi32(y, x); in idct16_low8_avx2()
1324 u[11] = _mm256_sub_epi32(y, x); in idct16_low8_avx2()
1445 v[1] = _mm256_sub_epi32(x, y); in idct16_avx2()
1469 u[5] = _mm256_sub_epi32(y, x); in idct16_avx2()
1493 v[10] = _mm256_sub_epi32(y, x); in idct16_avx2()
1503 v[11] = _mm256_sub_epi32(y, x); in idct16_avx2()
1560 v[1] = _mm256_sub_epi32(zero, x); in iadst16_low1_avx2()
[all …]
/external/ruy/ruy/
Dkernel_avx2_fma.cc190 _mm256_sub_epi32(initial_accum_data, lhs_sums_offset);
195 accum_data_v0 = _mm256_sub_epi32(
197 accum_data_v1 = _mm256_sub_epi32(
199 accum_data_v2 = _mm256_sub_epi32(
201 accum_data_v3 = _mm256_sub_epi32(
203 accum_data_v4 = _mm256_sub_epi32(
205 accum_data_v5 = _mm256_sub_epi32(
207 accum_data_v6 = _mm256_sub_epi32(
209 accum_data_v7 = _mm256_sub_epi32(
361 const __m256i neg_e_vector = _mm256_sub_epi32(zero_vector, e_vector);
[all …]
/external/XNNPACK/src/qs8-vadd/gen/
Dminmax-avx2-mul32-ld64-x32.c62 …vacc01234567 = _mm256_sub_epi32(_mm256_sra_epi32(vacc01234567, vshift), _mm256_cmpgt_epi32(vrem012… in xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x32()
63 …vacc89ABCDEF = _mm256_sub_epi32(_mm256_sra_epi32(vacc89ABCDEF, vshift), _mm256_cmpgt_epi32(vrem89A… in xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x32()
64 …vaccGHIJKLMN = _mm256_sub_epi32(_mm256_sra_epi32(vaccGHIJKLMN, vshift), _mm256_cmpgt_epi32(vremGHI… in xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x32()
65 …vaccOPQRSTUV = _mm256_sub_epi32(_mm256_sra_epi32(vaccOPQRSTUV, vshift), _mm256_cmpgt_epi32(vremOPQ… in xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x32()
93 …vacc01234567 = _mm256_sub_epi32(_mm256_sra_epi32(vacc01234567, vshift), _mm256_cmpgt_epi32(vrem012… in xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x32()
Dminmax-avx2-mul32-ld64-x24.c57 …vacc01234567 = _mm256_sub_epi32(_mm256_sra_epi32(vacc01234567, vshift), _mm256_cmpgt_epi32(vrem012… in xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x24()
58 …vacc89ABCDEF = _mm256_sub_epi32(_mm256_sra_epi32(vacc89ABCDEF, vshift), _mm256_cmpgt_epi32(vrem89A… in xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x24()
59 …vaccGHIJKLMN = _mm256_sub_epi32(_mm256_sra_epi32(vaccGHIJKLMN, vshift), _mm256_cmpgt_epi32(vremGHI… in xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x24()
87 …vacc01234567 = _mm256_sub_epi32(_mm256_sra_epi32(vacc01234567, vshift), _mm256_cmpgt_epi32(vrem012… in xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x24()
Dminmax-avx2-mul32-ld64-x16.c52 …vacc01234567 = _mm256_sub_epi32(_mm256_sra_epi32(vacc01234567, vshift), _mm256_cmpgt_epi32(vrem012… in xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x16()
53 …vacc89ABCDEF = _mm256_sub_epi32(_mm256_sra_epi32(vacc89ABCDEF, vshift), _mm256_cmpgt_epi32(vrem89A… in xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x16()
77 …vacc01234567 = _mm256_sub_epi32(_mm256_sra_epi32(vacc01234567, vshift), _mm256_cmpgt_epi32(vrem012… in xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x16()
Dminmax-avx2-mul32-ld64-x8.c47 …vacc01234567 = _mm256_sub_epi32(_mm256_sra_epi32(vacc01234567, vshift), _mm256_cmpgt_epi32(vrem012… in xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x8()
69 …vacc01234567 = _mm256_sub_epi32(_mm256_sra_epi32(vacc01234567, vshift), _mm256_cmpgt_epi32(vrem012… in xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x8()
/external/flac/src/libFLAC/
Dlpc_intrin_avx2.c93 …_mm256_storeu_si256((__m256i*)(residual+i), _mm256_sub_epi32(_mm256_loadu_si256((const __m256i*)(d… in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_avx2()
124 …_mm256_storeu_si256((__m256i*)(residual+i), _mm256_sub_epi32(_mm256_loadu_si256((const __m256i*)(d… in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_avx2()
155 …_mm256_storeu_si256((__m256i*)(residual+i), _mm256_sub_epi32(_mm256_loadu_si256((const __m256i*)(d… in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_avx2()
182 …_mm256_storeu_si256((__m256i*)(residual+i), _mm256_sub_epi32(_mm256_loadu_si256((const __m256i*)(d… in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_avx2()
211 …_mm256_storeu_si256((__m256i*)(residual+i), _mm256_sub_epi32(_mm256_loadu_si256((const __m256i*)(d… in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_avx2()
234 …_mm256_storeu_si256((__m256i*)(residual+i), _mm256_sub_epi32(_mm256_loadu_si256((const __m256i*)(d… in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_avx2()
257 …_mm256_storeu_si256((__m256i*)(residual+i), _mm256_sub_epi32(_mm256_loadu_si256((const __m256i*)(d… in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_avx2()
276 …_mm256_storeu_si256((__m256i*)(residual+i), _mm256_sub_epi32(_mm256_loadu_si256((const __m256i*)(d… in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_avx2()
297 …_mm256_storeu_si256((__m256i*)(residual+i), _mm256_sub_epi32(_mm256_loadu_si256((const __m256i*)(d… in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_avx2()
312 …_mm256_storeu_si256((__m256i*)(residual+i), _mm256_sub_epi32(_mm256_loadu_si256((const __m256i*)(d… in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_avx2()
[all …]
/external/XNNPACK/src/qs8-igemm/gen/
D3x8c8-minmax-avx2.c190_mm256_sub_epi32(_mm256_sra_epi32(vq31prod0x01234567, vshift), _mm256_cmpgt_epi32(vrem0x01234567, … in xnn_qs8_igemm_minmax_ukernel_3x8c8__avx2()
192_mm256_sub_epi32(_mm256_sra_epi32(vq31prod1x01234567, vshift), _mm256_cmpgt_epi32(vrem1x01234567, … in xnn_qs8_igemm_minmax_ukernel_3x8c8__avx2()
194_mm256_sub_epi32(_mm256_sra_epi32(vq31prod2x01234567, vshift), _mm256_cmpgt_epi32(vrem2x01234567, … in xnn_qs8_igemm_minmax_ukernel_3x8c8__avx2()
/external/XNNPACK/src/qs8-gemm/gen/
D3x8c8-minmax-avx2.c173_mm256_sub_epi32(_mm256_sra_epi32(vq31prod0x01234567, vshift), _mm256_cmpgt_epi32(vrem0x01234567, … in xnn_qs8_gemm_minmax_ukernel_3x8c8__avx2()
175_mm256_sub_epi32(_mm256_sra_epi32(vq31prod1x01234567, vshift), _mm256_cmpgt_epi32(vrem1x01234567, … in xnn_qs8_gemm_minmax_ukernel_3x8c8__avx2()
177_mm256_sub_epi32(_mm256_sra_epi32(vq31prod2x01234567, vshift), _mm256_cmpgt_epi32(vrem2x01234567, … in xnn_qs8_gemm_minmax_ukernel_3x8c8__avx2()
D3x8c8-xw-minmax-avx2.c169_mm256_sub_epi32(_mm256_sra_epi32(vq31prod0x01234567, vshift), _mm256_cmpgt_epi32(vrem0x01234567, … in xnn_qs8_gemm_xw_minmax_ukernel_3x8c8__avx2()
171_mm256_sub_epi32(_mm256_sra_epi32(vq31prod1x01234567, vshift), _mm256_cmpgt_epi32(vrem1x01234567, … in xnn_qs8_gemm_xw_minmax_ukernel_3x8c8__avx2()
173_mm256_sub_epi32(_mm256_sra_epi32(vq31prod2x01234567, vshift), _mm256_cmpgt_epi32(vrem2x01234567, … in xnn_qs8_gemm_xw_minmax_ukernel_3x8c8__avx2()

123