/external/XNNPACK/src/f32-velu/gen/ |
D | velu-avx512f-rr1-lut16-p3-perm-x128.c | 71 const __m512i ven0 = _mm512_slli_epi32(_mm512_castps_si512(vn0), 19); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x128() 73 const __m512i ven1 = _mm512_slli_epi32(_mm512_castps_si512(vn1), 19); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x128() 75 const __m512i ven2 = _mm512_slli_epi32(_mm512_castps_si512(vn2), 19); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x128() 77 const __m512i ven3 = _mm512_slli_epi32(_mm512_castps_si512(vn3), 19); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x128() 79 const __m512i ven4 = _mm512_slli_epi32(_mm512_castps_si512(vn4), 19); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x128() 81 const __m512i ven5 = _mm512_slli_epi32(_mm512_castps_si512(vn5), 19); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x128() 83 const __m512i ven6 = _mm512_slli_epi32(_mm512_castps_si512(vn6), 19); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x128() 85 const __m512i ven7 = _mm512_slli_epi32(_mm512_castps_si512(vn7), 19); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x128() 203 const __m512i ven = _mm512_slli_epi32(_mm512_castps_si512(vn), 19); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x128() 236 const __m512i ven = _mm512_slli_epi32(_mm512_castps_si512(vn), 19); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x128()
|
D | velu-avx512f-rr1-lut16-p3-perm-x112.c | 68 const __m512i ven0 = _mm512_slli_epi32(_mm512_castps_si512(vn0), 19); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x112() 70 const __m512i ven1 = _mm512_slli_epi32(_mm512_castps_si512(vn1), 19); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x112() 72 const __m512i ven2 = _mm512_slli_epi32(_mm512_castps_si512(vn2), 19); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x112() 74 const __m512i ven3 = _mm512_slli_epi32(_mm512_castps_si512(vn3), 19); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x112() 76 const __m512i ven4 = _mm512_slli_epi32(_mm512_castps_si512(vn4), 19); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x112() 78 const __m512i ven5 = _mm512_slli_epi32(_mm512_castps_si512(vn5), 19); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x112() 80 const __m512i ven6 = _mm512_slli_epi32(_mm512_castps_si512(vn6), 19); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x112() 186 const __m512i ven = _mm512_slli_epi32(_mm512_castps_si512(vn), 19); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x112() 219 const __m512i ven = _mm512_slli_epi32(_mm512_castps_si512(vn), 19); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x112()
|
D | velu-avx512f-rr1-p6-x128.c | 71 __m512 vs0 = _mm512_castsi512_ps(_mm512_slli_epi32(_mm512_castps_si512(vn0), 23)); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x128() 73 __m512 vs1 = _mm512_castsi512_ps(_mm512_slli_epi32(_mm512_castps_si512(vn1), 23)); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x128() 75 __m512 vs2 = _mm512_castsi512_ps(_mm512_slli_epi32(_mm512_castps_si512(vn2), 23)); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x128() 77 __m512 vs3 = _mm512_castsi512_ps(_mm512_slli_epi32(_mm512_castps_si512(vn3), 23)); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x128() 79 __m512 vs4 = _mm512_castsi512_ps(_mm512_slli_epi32(_mm512_castps_si512(vn4), 23)); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x128() 81 __m512 vs5 = _mm512_castsi512_ps(_mm512_slli_epi32(_mm512_castps_si512(vn5), 23)); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x128() 83 __m512 vs6 = _mm512_castsi512_ps(_mm512_slli_epi32(_mm512_castps_si512(vn6), 23)); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x128() 85 __m512 vs7 = _mm512_castsi512_ps(_mm512_slli_epi32(_mm512_castps_si512(vn7), 23)); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x128() 213 __m512 vs = _mm512_castsi512_ps(_mm512_slli_epi32(_mm512_castps_si512(vn), 23)); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x128() 247 __m512 vs = _mm512_castsi512_ps(_mm512_slli_epi32(_mm512_castps_si512(vn), 23)); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x128()
|
D | velu-avx512f-rr1-lut16-p3-perm-x80.c | 62 const __m512i ven0 = _mm512_slli_epi32(_mm512_castps_si512(vn0), 19); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x80() 64 const __m512i ven1 = _mm512_slli_epi32(_mm512_castps_si512(vn1), 19); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x80() 66 const __m512i ven2 = _mm512_slli_epi32(_mm512_castps_si512(vn2), 19); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x80() 68 const __m512i ven3 = _mm512_slli_epi32(_mm512_castps_si512(vn3), 19); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x80() 70 const __m512i ven4 = _mm512_slli_epi32(_mm512_castps_si512(vn4), 19); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x80() 152 const __m512i ven = _mm512_slli_epi32(_mm512_castps_si512(vn), 19); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x80() 185 const __m512i ven = _mm512_slli_epi32(_mm512_castps_si512(vn), 19); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x80()
|
D | velu-avx512f-rr1-lut16-p3-perm-x96.c | 65 const __m512i ven0 = _mm512_slli_epi32(_mm512_castps_si512(vn0), 19); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x96() 67 const __m512i ven1 = _mm512_slli_epi32(_mm512_castps_si512(vn1), 19); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x96() 69 const __m512i ven2 = _mm512_slli_epi32(_mm512_castps_si512(vn2), 19); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x96() 71 const __m512i ven3 = _mm512_slli_epi32(_mm512_castps_si512(vn3), 19); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x96() 73 const __m512i ven4 = _mm512_slli_epi32(_mm512_castps_si512(vn4), 19); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x96() 75 const __m512i ven5 = _mm512_slli_epi32(_mm512_castps_si512(vn5), 19); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x96() 169 const __m512i ven = _mm512_slli_epi32(_mm512_castps_si512(vn), 19); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x96() 202 const __m512i ven = _mm512_slli_epi32(_mm512_castps_si512(vn), 19); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x96()
|
D | velu-avx512f-rr1-p6-x96.c | 65 __m512 vs0 = _mm512_castsi512_ps(_mm512_slli_epi32(_mm512_castps_si512(vn0), 23)); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x96() 67 __m512 vs1 = _mm512_castsi512_ps(_mm512_slli_epi32(_mm512_castps_si512(vn1), 23)); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x96() 69 __m512 vs2 = _mm512_castsi512_ps(_mm512_slli_epi32(_mm512_castps_si512(vn2), 23)); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x96() 71 __m512 vs3 = _mm512_castsi512_ps(_mm512_slli_epi32(_mm512_castps_si512(vn3), 23)); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x96() 73 __m512 vs4 = _mm512_castsi512_ps(_mm512_slli_epi32(_mm512_castps_si512(vn4), 23)); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x96() 75 __m512 vs5 = _mm512_castsi512_ps(_mm512_slli_epi32(_mm512_castps_si512(vn5), 23)); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x96() 177 __m512 vs = _mm512_castsi512_ps(_mm512_slli_epi32(_mm512_castps_si512(vn), 23)); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x96() 211 __m512 vs = _mm512_castsi512_ps(_mm512_slli_epi32(_mm512_castps_si512(vn), 23)); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x96()
|
D | velu-avx512f-rr1-p6-x112.c | 68 __m512 vs0 = _mm512_castsi512_ps(_mm512_slli_epi32(_mm512_castps_si512(vn0), 23)); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x112() 70 __m512 vs1 = _mm512_castsi512_ps(_mm512_slli_epi32(_mm512_castps_si512(vn1), 23)); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x112() 72 __m512 vs2 = _mm512_castsi512_ps(_mm512_slli_epi32(_mm512_castps_si512(vn2), 23)); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x112() 74 __m512 vs3 = _mm512_castsi512_ps(_mm512_slli_epi32(_mm512_castps_si512(vn3), 23)); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x112() 76 __m512 vs4 = _mm512_castsi512_ps(_mm512_slli_epi32(_mm512_castps_si512(vn4), 23)); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x112() 78 __m512 vs5 = _mm512_castsi512_ps(_mm512_slli_epi32(_mm512_castps_si512(vn5), 23)); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x112() 80 __m512 vs6 = _mm512_castsi512_ps(_mm512_slli_epi32(_mm512_castps_si512(vn6), 23)); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x112() 195 __m512 vs = _mm512_castsi512_ps(_mm512_slli_epi32(_mm512_castps_si512(vn), 23)); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x112() 229 __m512 vs = _mm512_castsi512_ps(_mm512_slli_epi32(_mm512_castps_si512(vn), 23)); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x112()
|
D | velu-avx512f-rr1-p6-x80.c | 62 __m512 vs0 = _mm512_castsi512_ps(_mm512_slli_epi32(_mm512_castps_si512(vn0), 23)); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x80() 64 __m512 vs1 = _mm512_castsi512_ps(_mm512_slli_epi32(_mm512_castps_si512(vn1), 23)); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x80() 66 __m512 vs2 = _mm512_castsi512_ps(_mm512_slli_epi32(_mm512_castps_si512(vn2), 23)); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x80() 68 __m512 vs3 = _mm512_castsi512_ps(_mm512_slli_epi32(_mm512_castps_si512(vn3), 23)); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x80() 70 __m512 vs4 = _mm512_castsi512_ps(_mm512_slli_epi32(_mm512_castps_si512(vn4), 23)); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x80() 159 __m512 vs = _mm512_castsi512_ps(_mm512_slli_epi32(_mm512_castps_si512(vn), 23)); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x80() 193 __m512 vs = _mm512_castsi512_ps(_mm512_slli_epi32(_mm512_castps_si512(vn), 23)); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x80()
|
D | velu-avx512f-rr1-p6-x64.c | 59 __m512 vs0 = _mm512_castsi512_ps(_mm512_slli_epi32(_mm512_castps_si512(vn0), 23)); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x64() 61 __m512 vs1 = _mm512_castsi512_ps(_mm512_slli_epi32(_mm512_castps_si512(vn1), 23)); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x64() 63 __m512 vs2 = _mm512_castsi512_ps(_mm512_slli_epi32(_mm512_castps_si512(vn2), 23)); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x64() 65 __m512 vs3 = _mm512_castsi512_ps(_mm512_slli_epi32(_mm512_castps_si512(vn3), 23)); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x64() 141 __m512 vs = _mm512_castsi512_ps(_mm512_slli_epi32(_mm512_castps_si512(vn), 23)); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x64() 175 __m512 vs = _mm512_castsi512_ps(_mm512_slli_epi32(_mm512_castps_si512(vn), 23)); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x64()
|
D | velu-avx512f-rr1-lut16-p3-perm-x64.c | 59 const __m512i ven0 = _mm512_slli_epi32(_mm512_castps_si512(vn0), 19); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x64() 61 const __m512i ven1 = _mm512_slli_epi32(_mm512_castps_si512(vn1), 19); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x64() 63 const __m512i ven2 = _mm512_slli_epi32(_mm512_castps_si512(vn2), 19); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x64() 65 const __m512i ven3 = _mm512_slli_epi32(_mm512_castps_si512(vn3), 19); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x64() 135 const __m512i ven = _mm512_slli_epi32(_mm512_castps_si512(vn), 19); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x64() 168 const __m512i ven = _mm512_slli_epi32(_mm512_castps_si512(vn), 19); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x64()
|
D | velu-avx512f-rr1-lut16-p3-perm-x48.c | 56 const __m512i ven0 = _mm512_slli_epi32(_mm512_castps_si512(vn0), 19); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x48() 58 const __m512i ven1 = _mm512_slli_epi32(_mm512_castps_si512(vn1), 19); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x48() 60 const __m512i ven2 = _mm512_slli_epi32(_mm512_castps_si512(vn2), 19); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x48() 118 const __m512i ven = _mm512_slli_epi32(_mm512_castps_si512(vn), 19); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x48() 151 const __m512i ven = _mm512_slli_epi32(_mm512_castps_si512(vn), 19); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x48()
|
D | velu-avx512f-rr1-p6-x48.c | 56 __m512 vs0 = _mm512_castsi512_ps(_mm512_slli_epi32(_mm512_castps_si512(vn0), 23)); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x48() 58 __m512 vs1 = _mm512_castsi512_ps(_mm512_slli_epi32(_mm512_castps_si512(vn1), 23)); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x48() 60 __m512 vs2 = _mm512_castsi512_ps(_mm512_slli_epi32(_mm512_castps_si512(vn2), 23)); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x48() 123 __m512 vs = _mm512_castsi512_ps(_mm512_slli_epi32(_mm512_castps_si512(vn), 23)); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x48() 157 __m512 vs = _mm512_castsi512_ps(_mm512_slli_epi32(_mm512_castps_si512(vn), 23)); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x48()
|
D | velu-avx512f-rr1-lut16-p3-perm-x32.c | 53 const __m512i ven0 = _mm512_slli_epi32(_mm512_castps_si512(vn0), 19); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x32() 55 const __m512i ven1 = _mm512_slli_epi32(_mm512_castps_si512(vn1), 19); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x32() 101 const __m512i ven = _mm512_slli_epi32(_mm512_castps_si512(vn), 19); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x32() 134 const __m512i ven = _mm512_slli_epi32(_mm512_castps_si512(vn), 19); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x32()
|
D | velu-avx512f-rr1-p6-x32.c | 53 __m512 vs0 = _mm512_castsi512_ps(_mm512_slli_epi32(_mm512_castps_si512(vn0), 23)); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x32() 55 __m512 vs1 = _mm512_castsi512_ps(_mm512_slli_epi32(_mm512_castps_si512(vn1), 23)); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x32() 105 __m512 vs = _mm512_castsi512_ps(_mm512_slli_epi32(_mm512_castps_si512(vn), 23)); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x32() 139 __m512 vs = _mm512_castsi512_ps(_mm512_slli_epi32(_mm512_castps_si512(vn), 23)); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x32()
|
D | velu-avx512f-rr1-lut16-p3-perm-x16.c | 50 const __m512i ven = _mm512_slli_epi32(_mm512_castps_si512(vn), 19); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x16() 83 const __m512i ven = _mm512_slli_epi32(_mm512_castps_si512(vn), 19); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x16()
|
D | velu-avx512f-rr1-p6-x16.c | 50 __m512 vs = _mm512_castsi512_ps(_mm512_slli_epi32(_mm512_castps_si512(vn), 23)); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x16() 84 __m512 vs = _mm512_castsi512_ps(_mm512_slli_epi32(_mm512_castps_si512(vn), 23)); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x16()
|
/external/llvm-project/clang/lib/Headers/ |
D | avx512bf16intrin.h | 237 return _mm512_castsi512_ps((__m512i)_mm512_slli_epi32( in _mm512_cvtpbh_ps() 253 return _mm512_castsi512_ps((__m512i)_mm512_slli_epi32( in _mm512_maskz_cvtpbh_ps()
|
/external/XNNPACK/src/f32-velu/ |
D | avx512f-rr1-lut16-p3-perm.c.in | 55 const __m512i ven${N} = _mm512_slli_epi32(_mm512_castps_si512(vn${N}), 19); 99 const __m512i ven = _mm512_slli_epi32(_mm512_castps_si512(vn), 19); 132 const __m512i ven = _mm512_slli_epi32(_mm512_castps_si512(vn), 19);
|
D | avx512f-rr1-p6.c.in | 55 __m512 vs${N} = _mm512_castsi512_ps(_mm512_slli_epi32(_mm512_castps_si512(vn${N}), 23)); 104 __m512 vs = _mm512_castsi512_ps(_mm512_slli_epi32(_mm512_castps_si512(vn), 23)); 138 __m512 vs = _mm512_castsi512_ps(_mm512_slli_epi32(_mm512_castps_si512(vn), 23));
|
/external/XNNPACK/src/math/ |
D | expm1minus-avx512f-rr1-lut16-p3-perm.c | 67 const __m512i ven = _mm512_slli_epi32(_mm512_castps_si512(vn), 19); in xnn_math_f32_expm1minus__avx512f_rr1_lut16_p3_perm()
|
D | expm1minus-avx512f-rr1-p6.c | 60 const __m512 vs = _mm512_castsi512_ps(_mm512_slli_epi32(_mm512_castps_si512(vn), 23)); in xnn_math_f32_expm1minus__avx512f_rr1_p6()
|
D | exp-avx512f-rr2-lut32-p2-perm2.c | 71 __m512i veo = _mm512_slli_epi32(_mm512_and_si512(_mm512_castps_si512(vn), vmantissa_mask), 18); in xnn_math_f32_exp__avx512f_rr2_lut32_p2_perm2()
|
D | exp-avx512f-rr2-p5.c | 59 __m512i veo = _mm512_slli_epi32(_mm512_castps_si512(vn), 23); in xnn_math_f32_exp__avx512f_rr2_p5()
|
D | exp-avx512f-rr2-lut16-p3-perm.c | 66 __m512i veo = _mm512_slli_epi32(_mm512_and_si512(_mm512_castps_si512(vn), vmantissa_mask), 19); in xnn_math_f32_exp__avx512f_rr2_lut16_p3_perm()
|
/external/tensorflow/tensorflow/core/kernels/ |
D | sparse_matmul_op.h | 451 return _mm512_castsi512_ps(_mm512_slli_epi32( in pexpand_bf16_l() 460 return _mm512_castsi512_ps(_mm512_slli_epi32( in pexpand_bf16_u()
|