/external/XNNPACK/src/f32-velu/gen/ |
D | velu-avx2-rr1-lut8-p4-perm-x72.c | 75 const __m256i vl0 = _mm256_permutevar8x32_epi32(vtable, _mm256_castps_si256(vn0)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x72() 78 const __m256i vl1 = _mm256_permutevar8x32_epi32(vtable, _mm256_castps_si256(vn1)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x72() 81 const __m256i vl2 = _mm256_permutevar8x32_epi32(vtable, _mm256_castps_si256(vn2)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x72() 84 const __m256i vl3 = _mm256_permutevar8x32_epi32(vtable, _mm256_castps_si256(vn3)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x72() 87 const __m256i vl4 = _mm256_permutevar8x32_epi32(vtable, _mm256_castps_si256(vn4)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x72() 90 const __m256i vl5 = _mm256_permutevar8x32_epi32(vtable, _mm256_castps_si256(vn5)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x72() 93 const __m256i vl6 = _mm256_permutevar8x32_epi32(vtable, _mm256_castps_si256(vn6)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x72() 96 const __m256i vl7 = _mm256_permutevar8x32_epi32(vtable, _mm256_castps_si256(vn7)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x72() 99 const __m256i vl8 = _mm256_permutevar8x32_epi32(vtable, _mm256_castps_si256(vn8)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x72() 227 const __m256i vl = _mm256_permutevar8x32_epi32(vtable, _mm256_castps_si256(vn)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x72() [all …]
|
D | velu-avx2-rr1-lut8-p4-perm-x80.c | 78 const __m256i vl0 = _mm256_permutevar8x32_epi32(vtable, _mm256_castps_si256(vn0)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x80() 81 const __m256i vl1 = _mm256_permutevar8x32_epi32(vtable, _mm256_castps_si256(vn1)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x80() 84 const __m256i vl2 = _mm256_permutevar8x32_epi32(vtable, _mm256_castps_si256(vn2)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x80() 87 const __m256i vl3 = _mm256_permutevar8x32_epi32(vtable, _mm256_castps_si256(vn3)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x80() 90 const __m256i vl4 = _mm256_permutevar8x32_epi32(vtable, _mm256_castps_si256(vn4)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x80() 93 const __m256i vl5 = _mm256_permutevar8x32_epi32(vtable, _mm256_castps_si256(vn5)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x80() 96 const __m256i vl6 = _mm256_permutevar8x32_epi32(vtable, _mm256_castps_si256(vn6)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x80() 99 const __m256i vl7 = _mm256_permutevar8x32_epi32(vtable, _mm256_castps_si256(vn7)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x80() 102 const __m256i vl8 = _mm256_permutevar8x32_epi32(vtable, _mm256_castps_si256(vn8)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x80() 105 const __m256i vl9 = _mm256_permutevar8x32_epi32(vtable, _mm256_castps_si256(vn9)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x80() [all …]
|
D | velu-avx2-rr1-lut8-p4-perm-x64.c | 72 const __m256i vl0 = _mm256_permutevar8x32_epi32(vtable, _mm256_castps_si256(vn0)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x64() 75 const __m256i vl1 = _mm256_permutevar8x32_epi32(vtable, _mm256_castps_si256(vn1)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x64() 78 const __m256i vl2 = _mm256_permutevar8x32_epi32(vtable, _mm256_castps_si256(vn2)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x64() 81 const __m256i vl3 = _mm256_permutevar8x32_epi32(vtable, _mm256_castps_si256(vn3)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x64() 84 const __m256i vl4 = _mm256_permutevar8x32_epi32(vtable, _mm256_castps_si256(vn4)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x64() 87 const __m256i vl5 = _mm256_permutevar8x32_epi32(vtable, _mm256_castps_si256(vn5)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x64() 90 const __m256i vl6 = _mm256_permutevar8x32_epi32(vtable, _mm256_castps_si256(vn6)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x64() 93 const __m256i vl7 = _mm256_permutevar8x32_epi32(vtable, _mm256_castps_si256(vn7)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x64() 209 const __m256i vl = _mm256_permutevar8x32_epi32(vtable, _mm256_castps_si256(vn)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x64() 241 const __m256i vl = _mm256_permutevar8x32_epi32(vtable, _mm256_castps_si256(vn)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x64()
|
D | velu-avx2-rr1-lut8-p4-perm-x48.c | 66 const __m256i vl0 = _mm256_permutevar8x32_epi32(vtable, _mm256_castps_si256(vn0)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x48() 69 const __m256i vl1 = _mm256_permutevar8x32_epi32(vtable, _mm256_castps_si256(vn1)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x48() 72 const __m256i vl2 = _mm256_permutevar8x32_epi32(vtable, _mm256_castps_si256(vn2)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x48() 75 const __m256i vl3 = _mm256_permutevar8x32_epi32(vtable, _mm256_castps_si256(vn3)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x48() 78 const __m256i vl4 = _mm256_permutevar8x32_epi32(vtable, _mm256_castps_si256(vn4)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x48() 81 const __m256i vl5 = _mm256_permutevar8x32_epi32(vtable, _mm256_castps_si256(vn5)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x48() 173 const __m256i vl = _mm256_permutevar8x32_epi32(vtable, _mm256_castps_si256(vn)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x48() 205 const __m256i vl = _mm256_permutevar8x32_epi32(vtable, _mm256_castps_si256(vn)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x48()
|
D | velu-avx2-rr1-lut8-p4-perm-x56.c | 69 const __m256i vl0 = _mm256_permutevar8x32_epi32(vtable, _mm256_castps_si256(vn0)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x56() 72 const __m256i vl1 = _mm256_permutevar8x32_epi32(vtable, _mm256_castps_si256(vn1)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x56() 75 const __m256i vl2 = _mm256_permutevar8x32_epi32(vtable, _mm256_castps_si256(vn2)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x56() 78 const __m256i vl3 = _mm256_permutevar8x32_epi32(vtable, _mm256_castps_si256(vn3)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x56() 81 const __m256i vl4 = _mm256_permutevar8x32_epi32(vtable, _mm256_castps_si256(vn4)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x56() 84 const __m256i vl5 = _mm256_permutevar8x32_epi32(vtable, _mm256_castps_si256(vn5)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x56() 87 const __m256i vl6 = _mm256_permutevar8x32_epi32(vtable, _mm256_castps_si256(vn6)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x56() 191 const __m256i vl = _mm256_permutevar8x32_epi32(vtable, _mm256_castps_si256(vn)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x56() 223 const __m256i vl = _mm256_permutevar8x32_epi32(vtable, _mm256_castps_si256(vn)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x56()
|
D | velu-avx2-rr1-lut8-p4-perm-x40.c | 63 const __m256i vl0 = _mm256_permutevar8x32_epi32(vtable, _mm256_castps_si256(vn0)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x40() 66 const __m256i vl1 = _mm256_permutevar8x32_epi32(vtable, _mm256_castps_si256(vn1)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x40() 69 const __m256i vl2 = _mm256_permutevar8x32_epi32(vtable, _mm256_castps_si256(vn2)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x40() 72 const __m256i vl3 = _mm256_permutevar8x32_epi32(vtable, _mm256_castps_si256(vn3)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x40() 75 const __m256i vl4 = _mm256_permutevar8x32_epi32(vtable, _mm256_castps_si256(vn4)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x40() 155 const __m256i vl = _mm256_permutevar8x32_epi32(vtable, _mm256_castps_si256(vn)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x40() 187 const __m256i vl = _mm256_permutevar8x32_epi32(vtable, _mm256_castps_si256(vn)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x40()
|
D | velu-avx2-rr1-lut8-p4-perm-x32.c | 60 const __m256i vl0 = _mm256_permutevar8x32_epi32(vtable, _mm256_castps_si256(vn0)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x32() 63 const __m256i vl1 = _mm256_permutevar8x32_epi32(vtable, _mm256_castps_si256(vn1)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x32() 66 const __m256i vl2 = _mm256_permutevar8x32_epi32(vtable, _mm256_castps_si256(vn2)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x32() 69 const __m256i vl3 = _mm256_permutevar8x32_epi32(vtable, _mm256_castps_si256(vn3)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x32() 137 const __m256i vl = _mm256_permutevar8x32_epi32(vtable, _mm256_castps_si256(vn)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x32() 169 const __m256i vl = _mm256_permutevar8x32_epi32(vtable, _mm256_castps_si256(vn)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x32()
|
D | velu-avx2-rr1-lut8-p4-perm-x24.c | 57 const __m256i vl0 = _mm256_permutevar8x32_epi32(vtable, _mm256_castps_si256(vn0)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x24() 60 const __m256i vl1 = _mm256_permutevar8x32_epi32(vtable, _mm256_castps_si256(vn1)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x24() 63 const __m256i vl2 = _mm256_permutevar8x32_epi32(vtable, _mm256_castps_si256(vn2)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x24() 119 const __m256i vl = _mm256_permutevar8x32_epi32(vtable, _mm256_castps_si256(vn)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x24() 151 const __m256i vl = _mm256_permutevar8x32_epi32(vtable, _mm256_castps_si256(vn)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x24()
|
D | velu-avx2-rr1-lut8-p4-perm-x16.c | 54 const __m256i vl0 = _mm256_permutevar8x32_epi32(vtable, _mm256_castps_si256(vn0)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x16() 57 const __m256i vl1 = _mm256_permutevar8x32_epi32(vtable, _mm256_castps_si256(vn1)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x16() 101 const __m256i vl = _mm256_permutevar8x32_epi32(vtable, _mm256_castps_si256(vn)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x16() 133 const __m256i vl = _mm256_permutevar8x32_epi32(vtable, _mm256_castps_si256(vn)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x16()
|
D | velu-avx2-rr1-lut8-p4-perm-x8.c | 50 const __m256i vl = _mm256_permutevar8x32_epi32(vtable, _mm256_castps_si256(vn)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x8() 82 const __m256i vl = _mm256_permutevar8x32_epi32(vtable, _mm256_castps_si256(vn)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x8()
|
/external/tensorflow/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/ |
D | TypeCastingAVX2.h | 43 return _mm256_permutevar8x32_epi32(converted, permute_mask); 62 return _mm256_permutevar8x32_epi32(converted, permute_mask); 87 return _mm256_permutevar8x32_epi32(converted, permute_mask);
|
/external/ruy/ruy/ |
D | kernel_avx2_fma.cc | 229 _mm256_permutevar8x32_epi32(bias_data, _mm256_set1_epi32(0))); 232 _mm256_permutevar8x32_epi32(bias_data, _mm256_set1_epi32(1))); 235 _mm256_permutevar8x32_epi32(bias_data, _mm256_set1_epi32(2))); 238 _mm256_permutevar8x32_epi32(bias_data, _mm256_set1_epi32(3))); 241 _mm256_permutevar8x32_epi32(bias_data, _mm256_set1_epi32(4))); 244 _mm256_permutevar8x32_epi32(bias_data, _mm256_set1_epi32(5))); 247 _mm256_permutevar8x32_epi32(bias_data, _mm256_set1_epi32(6))); 250 _mm256_permutevar8x32_epi32(bias_data, _mm256_set1_epi32(7))); 503 results = _mm256_permutevar8x32_epi32(results, repack_perm); 916 results = _mm256_permutevar8x32_epi32(results, repack_perm);
|
/external/XNNPACK/src/f32-velu/ |
D | avx2-rr1-lut8-p4-perm.c.in | 56 const __m256i vl${N} = _mm256_permutevar8x32_epi32(vtable, _mm256_castps_si256(vn${N})); 97 const __m256i vl = _mm256_permutevar8x32_epi32(vtable, _mm256_castps_si256(vn)); 129 const __m256i vl = _mm256_permutevar8x32_epi32(vtable, _mm256_castps_si256(vn));
|
/external/XNNPACK/src/qs8-igemm/gen/ |
D | 3x8c8-minmax-avx2.c | 149 __m256i vacc0x01234567 = _mm256_permutevar8x32_epi32(vacc0x02461357, vpermute_mask); in xnn_qs8_igemm_minmax_ukernel_3x8c8__avx2() 150 __m256i vacc1x01234567 = _mm256_permutevar8x32_epi32(vacc1x02461357, vpermute_mask); in xnn_qs8_igemm_minmax_ukernel_3x8c8__avx2() 151 __m256i vacc2x01234567 = _mm256_permutevar8x32_epi32(vacc2x02461357, vpermute_mask); in xnn_qs8_igemm_minmax_ukernel_3x8c8__avx2()
|
D | 2x8c8-minmax-avx2.c | 127 __m256i vacc0x01234567 = _mm256_permutevar8x32_epi32(vacc0x02461357, vpermute_mask); in xnn_qs8_igemm_minmax_ukernel_2x8c8__avx2() 128 __m256i vacc1x01234567 = _mm256_permutevar8x32_epi32(vacc1x02461357, vpermute_mask); in xnn_qs8_igemm_minmax_ukernel_2x8c8__avx2()
|
D | 1x8c8-minmax-avx2.c | 105 __m256i vacc0x01234567 = _mm256_permutevar8x32_epi32(vacc0x02461357, vpermute_mask); in xnn_qs8_igemm_minmax_ukernel_1x8c8__avx2()
|
/external/XNNPACK/src/qs8-gemm/gen/ |
D | 3x8c8-minmax-avx2.c | 132 __m256i vacc0x01234567 = _mm256_permutevar8x32_epi32(vacc0x02461357, vpermute_mask); in xnn_qs8_gemm_minmax_ukernel_3x8c8__avx2() 133 __m256i vacc1x01234567 = _mm256_permutevar8x32_epi32(vacc1x02461357, vpermute_mask); in xnn_qs8_gemm_minmax_ukernel_3x8c8__avx2() 134 __m256i vacc2x01234567 = _mm256_permutevar8x32_epi32(vacc2x02461357, vpermute_mask); in xnn_qs8_gemm_minmax_ukernel_3x8c8__avx2()
|
D | 3x8c8-xw-minmax-avx2.c | 128 __m256i vacc0x01234567 = _mm256_permutevar8x32_epi32(vacc0x02461357, vpermute_mask); in xnn_qs8_gemm_xw_minmax_ukernel_3x8c8__avx2() 129 __m256i vacc1x01234567 = _mm256_permutevar8x32_epi32(vacc1x02461357, vpermute_mask); in xnn_qs8_gemm_xw_minmax_ukernel_3x8c8__avx2() 130 __m256i vacc2x01234567 = _mm256_permutevar8x32_epi32(vacc2x02461357, vpermute_mask); in xnn_qs8_gemm_xw_minmax_ukernel_3x8c8__avx2()
|
D | 2x8c8-minmax-avx2.c | 112 __m256i vacc0x01234567 = _mm256_permutevar8x32_epi32(vacc0x02461357, vpermute_mask); in xnn_qs8_gemm_minmax_ukernel_2x8c8__avx2() 113 __m256i vacc1x01234567 = _mm256_permutevar8x32_epi32(vacc1x02461357, vpermute_mask); in xnn_qs8_gemm_minmax_ukernel_2x8c8__avx2()
|
D | 2x8c8-xw-minmax-avx2.c | 108 __m256i vacc0x01234567 = _mm256_permutevar8x32_epi32(vacc0x02461357, vpermute_mask); in xnn_qs8_gemm_xw_minmax_ukernel_2x8c8__avx2() 109 __m256i vacc1x01234567 = _mm256_permutevar8x32_epi32(vacc1x02461357, vpermute_mask); in xnn_qs8_gemm_xw_minmax_ukernel_2x8c8__avx2()
|
D | 1x8c8-minmax-avx2.c | 92 __m256i vacc0x01234567 = _mm256_permutevar8x32_epi32(vacc0x02461357, vpermute_mask); in xnn_qs8_gemm_minmax_ukernel_1x8c8__avx2()
|
D | 1x8c8-xw-minmax-avx2.c | 88 __m256i vacc0x01234567 = _mm256_permutevar8x32_epi32(vacc0x02461357, vpermute_mask); in xnn_qs8_gemm_xw_minmax_ukernel_1x8c8__avx2()
|
D | 2x16c8-minmax-avx512skx.c | 143 …const __m256i vout01x084C2A6E195D3B7F = _mm256_permutevar8x32_epi32(vout01x084Cx2A6Ex195Dx3B7F, _m… in xnn_qs8_gemm_minmax_ukernel_2x16c8__avx512skx()
|
/external/XNNPACK/src/math/ |
D | expm1minus-avx2-rr1-lut8-p4-perm.c | 69 const __m256i vl = _mm256_permutevar8x32_epi32(vtable, _mm256_castps_si256(vn)); in xnn_math_f32_expm1minus__avx2_rr1_lut8_p4_perm()
|
/external/flac/src/libFLAC/ |
D | lpc_intrin_avx2.c | 806 summ = _mm256_permutevar8x32_epi32(_mm256_srl_epi64(summ, cnt), pack); in FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_avx2() 837 summ = _mm256_permutevar8x32_epi32(_mm256_srl_epi64(summ, cnt), pack); in FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_avx2() 868 summ = _mm256_permutevar8x32_epi32(_mm256_srl_epi64(summ, cnt), pack); in FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_avx2() 895 summ = _mm256_permutevar8x32_epi32(_mm256_srl_epi64(summ, cnt), pack); in FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_avx2() 924 summ = _mm256_permutevar8x32_epi32(_mm256_srl_epi64(summ, cnt), pack); in FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_avx2() 947 summ = _mm256_permutevar8x32_epi32(_mm256_srl_epi64(summ, cnt), pack); in FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_avx2() 970 summ = _mm256_permutevar8x32_epi32(_mm256_srl_epi64(summ, cnt), pack); in FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_avx2() 989 summ = _mm256_permutevar8x32_epi32(_mm256_srl_epi64(summ, cnt), pack); in FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_avx2() 1010 summ = _mm256_permutevar8x32_epi32(_mm256_srl_epi64(summ, cnt), pack); in FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_avx2() 1025 summ = _mm256_permutevar8x32_epi32(_mm256_srl_epi64(summ, cnt), pack); in FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_avx2() [all …]
|