Home
last modified time | relevance | path

Searched refs:_mm256_permutevar8x32_epi32 (Results 1 – 25 of 42) sorted by relevance

12

/external/XNNPACK/src/f32-velu/gen/
Dvelu-avx2-rr1-lut8-p4-perm-x72.c75 const __m256i vl0 = _mm256_permutevar8x32_epi32(vtable, _mm256_castps_si256(vn0)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x72()
78 const __m256i vl1 = _mm256_permutevar8x32_epi32(vtable, _mm256_castps_si256(vn1)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x72()
81 const __m256i vl2 = _mm256_permutevar8x32_epi32(vtable, _mm256_castps_si256(vn2)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x72()
84 const __m256i vl3 = _mm256_permutevar8x32_epi32(vtable, _mm256_castps_si256(vn3)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x72()
87 const __m256i vl4 = _mm256_permutevar8x32_epi32(vtable, _mm256_castps_si256(vn4)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x72()
90 const __m256i vl5 = _mm256_permutevar8x32_epi32(vtable, _mm256_castps_si256(vn5)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x72()
93 const __m256i vl6 = _mm256_permutevar8x32_epi32(vtable, _mm256_castps_si256(vn6)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x72()
96 const __m256i vl7 = _mm256_permutevar8x32_epi32(vtable, _mm256_castps_si256(vn7)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x72()
99 const __m256i vl8 = _mm256_permutevar8x32_epi32(vtable, _mm256_castps_si256(vn8)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x72()
227 const __m256i vl = _mm256_permutevar8x32_epi32(vtable, _mm256_castps_si256(vn)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x72()
[all …]
Dvelu-avx2-rr1-lut8-p4-perm-x80.c78 const __m256i vl0 = _mm256_permutevar8x32_epi32(vtable, _mm256_castps_si256(vn0)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x80()
81 const __m256i vl1 = _mm256_permutevar8x32_epi32(vtable, _mm256_castps_si256(vn1)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x80()
84 const __m256i vl2 = _mm256_permutevar8x32_epi32(vtable, _mm256_castps_si256(vn2)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x80()
87 const __m256i vl3 = _mm256_permutevar8x32_epi32(vtable, _mm256_castps_si256(vn3)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x80()
90 const __m256i vl4 = _mm256_permutevar8x32_epi32(vtable, _mm256_castps_si256(vn4)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x80()
93 const __m256i vl5 = _mm256_permutevar8x32_epi32(vtable, _mm256_castps_si256(vn5)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x80()
96 const __m256i vl6 = _mm256_permutevar8x32_epi32(vtable, _mm256_castps_si256(vn6)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x80()
99 const __m256i vl7 = _mm256_permutevar8x32_epi32(vtable, _mm256_castps_si256(vn7)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x80()
102 const __m256i vl8 = _mm256_permutevar8x32_epi32(vtable, _mm256_castps_si256(vn8)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x80()
105 const __m256i vl9 = _mm256_permutevar8x32_epi32(vtable, _mm256_castps_si256(vn9)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x80()
[all …]
Dvelu-avx2-rr1-lut8-p4-perm-x64.c72 const __m256i vl0 = _mm256_permutevar8x32_epi32(vtable, _mm256_castps_si256(vn0)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x64()
75 const __m256i vl1 = _mm256_permutevar8x32_epi32(vtable, _mm256_castps_si256(vn1)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x64()
78 const __m256i vl2 = _mm256_permutevar8x32_epi32(vtable, _mm256_castps_si256(vn2)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x64()
81 const __m256i vl3 = _mm256_permutevar8x32_epi32(vtable, _mm256_castps_si256(vn3)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x64()
84 const __m256i vl4 = _mm256_permutevar8x32_epi32(vtable, _mm256_castps_si256(vn4)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x64()
87 const __m256i vl5 = _mm256_permutevar8x32_epi32(vtable, _mm256_castps_si256(vn5)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x64()
90 const __m256i vl6 = _mm256_permutevar8x32_epi32(vtable, _mm256_castps_si256(vn6)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x64()
93 const __m256i vl7 = _mm256_permutevar8x32_epi32(vtable, _mm256_castps_si256(vn7)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x64()
209 const __m256i vl = _mm256_permutevar8x32_epi32(vtable, _mm256_castps_si256(vn)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x64()
241 const __m256i vl = _mm256_permutevar8x32_epi32(vtable, _mm256_castps_si256(vn)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x64()
Dvelu-avx2-rr1-lut8-p4-perm-x48.c66 const __m256i vl0 = _mm256_permutevar8x32_epi32(vtable, _mm256_castps_si256(vn0)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x48()
69 const __m256i vl1 = _mm256_permutevar8x32_epi32(vtable, _mm256_castps_si256(vn1)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x48()
72 const __m256i vl2 = _mm256_permutevar8x32_epi32(vtable, _mm256_castps_si256(vn2)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x48()
75 const __m256i vl3 = _mm256_permutevar8x32_epi32(vtable, _mm256_castps_si256(vn3)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x48()
78 const __m256i vl4 = _mm256_permutevar8x32_epi32(vtable, _mm256_castps_si256(vn4)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x48()
81 const __m256i vl5 = _mm256_permutevar8x32_epi32(vtable, _mm256_castps_si256(vn5)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x48()
173 const __m256i vl = _mm256_permutevar8x32_epi32(vtable, _mm256_castps_si256(vn)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x48()
205 const __m256i vl = _mm256_permutevar8x32_epi32(vtable, _mm256_castps_si256(vn)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x48()
Dvelu-avx2-rr1-lut8-p4-perm-x56.c69 const __m256i vl0 = _mm256_permutevar8x32_epi32(vtable, _mm256_castps_si256(vn0)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x56()
72 const __m256i vl1 = _mm256_permutevar8x32_epi32(vtable, _mm256_castps_si256(vn1)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x56()
75 const __m256i vl2 = _mm256_permutevar8x32_epi32(vtable, _mm256_castps_si256(vn2)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x56()
78 const __m256i vl3 = _mm256_permutevar8x32_epi32(vtable, _mm256_castps_si256(vn3)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x56()
81 const __m256i vl4 = _mm256_permutevar8x32_epi32(vtable, _mm256_castps_si256(vn4)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x56()
84 const __m256i vl5 = _mm256_permutevar8x32_epi32(vtable, _mm256_castps_si256(vn5)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x56()
87 const __m256i vl6 = _mm256_permutevar8x32_epi32(vtable, _mm256_castps_si256(vn6)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x56()
191 const __m256i vl = _mm256_permutevar8x32_epi32(vtable, _mm256_castps_si256(vn)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x56()
223 const __m256i vl = _mm256_permutevar8x32_epi32(vtable, _mm256_castps_si256(vn)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x56()
Dvelu-avx2-rr1-lut8-p4-perm-x40.c63 const __m256i vl0 = _mm256_permutevar8x32_epi32(vtable, _mm256_castps_si256(vn0)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x40()
66 const __m256i vl1 = _mm256_permutevar8x32_epi32(vtable, _mm256_castps_si256(vn1)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x40()
69 const __m256i vl2 = _mm256_permutevar8x32_epi32(vtable, _mm256_castps_si256(vn2)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x40()
72 const __m256i vl3 = _mm256_permutevar8x32_epi32(vtable, _mm256_castps_si256(vn3)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x40()
75 const __m256i vl4 = _mm256_permutevar8x32_epi32(vtable, _mm256_castps_si256(vn4)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x40()
155 const __m256i vl = _mm256_permutevar8x32_epi32(vtable, _mm256_castps_si256(vn)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x40()
187 const __m256i vl = _mm256_permutevar8x32_epi32(vtable, _mm256_castps_si256(vn)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x40()
Dvelu-avx2-rr1-lut8-p4-perm-x32.c60 const __m256i vl0 = _mm256_permutevar8x32_epi32(vtable, _mm256_castps_si256(vn0)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x32()
63 const __m256i vl1 = _mm256_permutevar8x32_epi32(vtable, _mm256_castps_si256(vn1)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x32()
66 const __m256i vl2 = _mm256_permutevar8x32_epi32(vtable, _mm256_castps_si256(vn2)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x32()
69 const __m256i vl3 = _mm256_permutevar8x32_epi32(vtable, _mm256_castps_si256(vn3)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x32()
137 const __m256i vl = _mm256_permutevar8x32_epi32(vtable, _mm256_castps_si256(vn)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x32()
169 const __m256i vl = _mm256_permutevar8x32_epi32(vtable, _mm256_castps_si256(vn)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x32()
Dvelu-avx2-rr1-lut8-p4-perm-x24.c57 const __m256i vl0 = _mm256_permutevar8x32_epi32(vtable, _mm256_castps_si256(vn0)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x24()
60 const __m256i vl1 = _mm256_permutevar8x32_epi32(vtable, _mm256_castps_si256(vn1)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x24()
63 const __m256i vl2 = _mm256_permutevar8x32_epi32(vtable, _mm256_castps_si256(vn2)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x24()
119 const __m256i vl = _mm256_permutevar8x32_epi32(vtable, _mm256_castps_si256(vn)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x24()
151 const __m256i vl = _mm256_permutevar8x32_epi32(vtable, _mm256_castps_si256(vn)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x24()
Dvelu-avx2-rr1-lut8-p4-perm-x16.c54 const __m256i vl0 = _mm256_permutevar8x32_epi32(vtable, _mm256_castps_si256(vn0)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x16()
57 const __m256i vl1 = _mm256_permutevar8x32_epi32(vtable, _mm256_castps_si256(vn1)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x16()
101 const __m256i vl = _mm256_permutevar8x32_epi32(vtable, _mm256_castps_si256(vn)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x16()
133 const __m256i vl = _mm256_permutevar8x32_epi32(vtable, _mm256_castps_si256(vn)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x16()
Dvelu-avx2-rr1-lut8-p4-perm-x8.c50 const __m256i vl = _mm256_permutevar8x32_epi32(vtable, _mm256_castps_si256(vn)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x8()
82 const __m256i vl = _mm256_permutevar8x32_epi32(vtable, _mm256_castps_si256(vn)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x8()
/external/tensorflow/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/
DTypeCastingAVX2.h43 return _mm256_permutevar8x32_epi32(converted, permute_mask);
62 return _mm256_permutevar8x32_epi32(converted, permute_mask);
87 return _mm256_permutevar8x32_epi32(converted, permute_mask);
/external/ruy/ruy/
Dkernel_avx2_fma.cc229 _mm256_permutevar8x32_epi32(bias_data, _mm256_set1_epi32(0)));
232 _mm256_permutevar8x32_epi32(bias_data, _mm256_set1_epi32(1)));
235 _mm256_permutevar8x32_epi32(bias_data, _mm256_set1_epi32(2)));
238 _mm256_permutevar8x32_epi32(bias_data, _mm256_set1_epi32(3)));
241 _mm256_permutevar8x32_epi32(bias_data, _mm256_set1_epi32(4)));
244 _mm256_permutevar8x32_epi32(bias_data, _mm256_set1_epi32(5)));
247 _mm256_permutevar8x32_epi32(bias_data, _mm256_set1_epi32(6)));
250 _mm256_permutevar8x32_epi32(bias_data, _mm256_set1_epi32(7)));
503 results = _mm256_permutevar8x32_epi32(results, repack_perm);
916 results = _mm256_permutevar8x32_epi32(results, repack_perm);
/external/XNNPACK/src/f32-velu/
Davx2-rr1-lut8-p4-perm.c.in56 const __m256i vl${N} = _mm256_permutevar8x32_epi32(vtable, _mm256_castps_si256(vn${N}));
97 const __m256i vl = _mm256_permutevar8x32_epi32(vtable, _mm256_castps_si256(vn));
129 const __m256i vl = _mm256_permutevar8x32_epi32(vtable, _mm256_castps_si256(vn));
/external/XNNPACK/src/qs8-igemm/gen/
D3x8c8-minmax-avx2.c149 __m256i vacc0x01234567 = _mm256_permutevar8x32_epi32(vacc0x02461357, vpermute_mask); in xnn_qs8_igemm_minmax_ukernel_3x8c8__avx2()
150 __m256i vacc1x01234567 = _mm256_permutevar8x32_epi32(vacc1x02461357, vpermute_mask); in xnn_qs8_igemm_minmax_ukernel_3x8c8__avx2()
151 __m256i vacc2x01234567 = _mm256_permutevar8x32_epi32(vacc2x02461357, vpermute_mask); in xnn_qs8_igemm_minmax_ukernel_3x8c8__avx2()
D2x8c8-minmax-avx2.c127 __m256i vacc0x01234567 = _mm256_permutevar8x32_epi32(vacc0x02461357, vpermute_mask); in xnn_qs8_igemm_minmax_ukernel_2x8c8__avx2()
128 __m256i vacc1x01234567 = _mm256_permutevar8x32_epi32(vacc1x02461357, vpermute_mask); in xnn_qs8_igemm_minmax_ukernel_2x8c8__avx2()
D1x8c8-minmax-avx2.c105 __m256i vacc0x01234567 = _mm256_permutevar8x32_epi32(vacc0x02461357, vpermute_mask); in xnn_qs8_igemm_minmax_ukernel_1x8c8__avx2()
/external/XNNPACK/src/qs8-gemm/gen/
D3x8c8-minmax-avx2.c132 __m256i vacc0x01234567 = _mm256_permutevar8x32_epi32(vacc0x02461357, vpermute_mask); in xnn_qs8_gemm_minmax_ukernel_3x8c8__avx2()
133 __m256i vacc1x01234567 = _mm256_permutevar8x32_epi32(vacc1x02461357, vpermute_mask); in xnn_qs8_gemm_minmax_ukernel_3x8c8__avx2()
134 __m256i vacc2x01234567 = _mm256_permutevar8x32_epi32(vacc2x02461357, vpermute_mask); in xnn_qs8_gemm_minmax_ukernel_3x8c8__avx2()
D3x8c8-xw-minmax-avx2.c128 __m256i vacc0x01234567 = _mm256_permutevar8x32_epi32(vacc0x02461357, vpermute_mask); in xnn_qs8_gemm_xw_minmax_ukernel_3x8c8__avx2()
129 __m256i vacc1x01234567 = _mm256_permutevar8x32_epi32(vacc1x02461357, vpermute_mask); in xnn_qs8_gemm_xw_minmax_ukernel_3x8c8__avx2()
130 __m256i vacc2x01234567 = _mm256_permutevar8x32_epi32(vacc2x02461357, vpermute_mask); in xnn_qs8_gemm_xw_minmax_ukernel_3x8c8__avx2()
D2x8c8-minmax-avx2.c112 __m256i vacc0x01234567 = _mm256_permutevar8x32_epi32(vacc0x02461357, vpermute_mask); in xnn_qs8_gemm_minmax_ukernel_2x8c8__avx2()
113 __m256i vacc1x01234567 = _mm256_permutevar8x32_epi32(vacc1x02461357, vpermute_mask); in xnn_qs8_gemm_minmax_ukernel_2x8c8__avx2()
D2x8c8-xw-minmax-avx2.c108 __m256i vacc0x01234567 = _mm256_permutevar8x32_epi32(vacc0x02461357, vpermute_mask); in xnn_qs8_gemm_xw_minmax_ukernel_2x8c8__avx2()
109 __m256i vacc1x01234567 = _mm256_permutevar8x32_epi32(vacc1x02461357, vpermute_mask); in xnn_qs8_gemm_xw_minmax_ukernel_2x8c8__avx2()
D1x8c8-minmax-avx2.c92 __m256i vacc0x01234567 = _mm256_permutevar8x32_epi32(vacc0x02461357, vpermute_mask); in xnn_qs8_gemm_minmax_ukernel_1x8c8__avx2()
D1x8c8-xw-minmax-avx2.c88 __m256i vacc0x01234567 = _mm256_permutevar8x32_epi32(vacc0x02461357, vpermute_mask); in xnn_qs8_gemm_xw_minmax_ukernel_1x8c8__avx2()
D2x16c8-minmax-avx512skx.c143 …const __m256i vout01x084C2A6E195D3B7F = _mm256_permutevar8x32_epi32(vout01x084Cx2A6Ex195Dx3B7F, _m… in xnn_qs8_gemm_minmax_ukernel_2x16c8__avx512skx()
/external/XNNPACK/src/math/
Dexpm1minus-avx2-rr1-lut8-p4-perm.c69 const __m256i vl = _mm256_permutevar8x32_epi32(vtable, _mm256_castps_si256(vn)); in xnn_math_f32_expm1minus__avx2_rr1_lut8_p4_perm()
/external/flac/src/libFLAC/
Dlpc_intrin_avx2.c806 summ = _mm256_permutevar8x32_epi32(_mm256_srl_epi64(summ, cnt), pack); in FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_avx2()
837 summ = _mm256_permutevar8x32_epi32(_mm256_srl_epi64(summ, cnt), pack); in FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_avx2()
868 summ = _mm256_permutevar8x32_epi32(_mm256_srl_epi64(summ, cnt), pack); in FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_avx2()
895 summ = _mm256_permutevar8x32_epi32(_mm256_srl_epi64(summ, cnt), pack); in FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_avx2()
924 summ = _mm256_permutevar8x32_epi32(_mm256_srl_epi64(summ, cnt), pack); in FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_avx2()
947 summ = _mm256_permutevar8x32_epi32(_mm256_srl_epi64(summ, cnt), pack); in FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_avx2()
970 summ = _mm256_permutevar8x32_epi32(_mm256_srl_epi64(summ, cnt), pack); in FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_avx2()
989 summ = _mm256_permutevar8x32_epi32(_mm256_srl_epi64(summ, cnt), pack); in FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_avx2()
1010 summ = _mm256_permutevar8x32_epi32(_mm256_srl_epi64(summ, cnt), pack); in FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_avx2()
1025 summ = _mm256_permutevar8x32_epi32(_mm256_srl_epi64(summ, cnt), pack); in FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_avx2()
[all …]

12