/external/XNNPACK/src/f32-vscaleextexp/gen/ |
D | avx512f-p5-scalef-x192.c | 39 const __m512 vscalev = _mm512_set1_ps(scale_value); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x192() local 172 __m512 vf0 = _mm512_mul_ps(vp0, vscalev); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x192() 173 __m512 vf1 = _mm512_mul_ps(vp1, vscalev); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x192() 174 __m512 vf2 = _mm512_mul_ps(vp2, vscalev); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x192() 175 __m512 vf3 = _mm512_mul_ps(vp3, vscalev); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x192() 176 __m512 vf4 = _mm512_mul_ps(vp4, vscalev); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x192() 177 __m512 vf5 = _mm512_mul_ps(vp5, vscalev); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x192() 178 __m512 vf6 = _mm512_mul_ps(vp6, vscalev); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x192() 179 __m512 vf7 = _mm512_mul_ps(vp7, vscalev); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x192() 180 __m512 vf8 = _mm512_mul_ps(vp8, vscalev); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x192() [all …]
|
D | avx512f-p5-scalef-x176.c | 39 const __m512 vscalev = _mm512_set1_ps(scale_value); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x176() local 163 __m512 vf0 = _mm512_mul_ps(vp0, vscalev); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x176() 164 __m512 vf1 = _mm512_mul_ps(vp1, vscalev); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x176() 165 __m512 vf2 = _mm512_mul_ps(vp2, vscalev); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x176() 166 __m512 vf3 = _mm512_mul_ps(vp3, vscalev); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x176() 167 __m512 vf4 = _mm512_mul_ps(vp4, vscalev); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x176() 168 __m512 vf5 = _mm512_mul_ps(vp5, vscalev); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x176() 169 __m512 vf6 = _mm512_mul_ps(vp6, vscalev); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x176() 170 __m512 vf7 = _mm512_mul_ps(vp7, vscalev); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x176() 171 __m512 vf8 = _mm512_mul_ps(vp8, vscalev); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x176() [all …]
|
D | avx512f-p5-scalef-x160.c | 39 const __m512 vscalev = _mm512_set1_ps(scale_value); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x160() local 154 __m512 vf0 = _mm512_mul_ps(vp0, vscalev); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x160() 155 __m512 vf1 = _mm512_mul_ps(vp1, vscalev); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x160() 156 __m512 vf2 = _mm512_mul_ps(vp2, vscalev); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x160() 157 __m512 vf3 = _mm512_mul_ps(vp3, vscalev); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x160() 158 __m512 vf4 = _mm512_mul_ps(vp4, vscalev); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x160() 159 __m512 vf5 = _mm512_mul_ps(vp5, vscalev); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x160() 160 __m512 vf6 = _mm512_mul_ps(vp6, vscalev); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x160() 161 __m512 vf7 = _mm512_mul_ps(vp7, vscalev); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x160() 162 __m512 vf8 = _mm512_mul_ps(vp8, vscalev); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x160() [all …]
|
D | avx512f-p5-scalef-x144.c | 39 const __m512 vscalev = _mm512_set1_ps(scale_value); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x144() local 145 __m512 vf0 = _mm512_mul_ps(vp0, vscalev); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x144() 146 __m512 vf1 = _mm512_mul_ps(vp1, vscalev); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x144() 147 __m512 vf2 = _mm512_mul_ps(vp2, vscalev); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x144() 148 __m512 vf3 = _mm512_mul_ps(vp3, vscalev); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x144() 149 __m512 vf4 = _mm512_mul_ps(vp4, vscalev); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x144() 150 __m512 vf5 = _mm512_mul_ps(vp5, vscalev); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x144() 151 __m512 vf6 = _mm512_mul_ps(vp6, vscalev); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x144() 152 __m512 vf7 = _mm512_mul_ps(vp7, vscalev); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x144() 153 __m512 vf8 = _mm512_mul_ps(vp8, vscalev); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x144() [all …]
|
D | avx512f-p5-scalef-x128.c | 39 const __m512 vscalev = _mm512_set1_ps(scale_value); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x128() local 136 __m512 vf0 = _mm512_mul_ps(vp0, vscalev); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x128() 137 __m512 vf1 = _mm512_mul_ps(vp1, vscalev); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x128() 138 __m512 vf2 = _mm512_mul_ps(vp2, vscalev); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x128() 139 __m512 vf3 = _mm512_mul_ps(vp3, vscalev); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x128() 140 __m512 vf4 = _mm512_mul_ps(vp4, vscalev); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x128() 141 __m512 vf5 = _mm512_mul_ps(vp5, vscalev); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x128() 142 __m512 vf6 = _mm512_mul_ps(vp6, vscalev); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x128() 143 __m512 vf7 = _mm512_mul_ps(vp7, vscalev); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x128() 198 __m512 vf = _mm512_mul_ps(vp, vscalev); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x128() [all …]
|
D | avx512f-p5-scalef-x112.c | 39 const __m512 vscalev = _mm512_set1_ps(scale_value); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x112() local 127 __m512 vf0 = _mm512_mul_ps(vp0, vscalev); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x112() 128 __m512 vf1 = _mm512_mul_ps(vp1, vscalev); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x112() 129 __m512 vf2 = _mm512_mul_ps(vp2, vscalev); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x112() 130 __m512 vf3 = _mm512_mul_ps(vp3, vscalev); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x112() 131 __m512 vf4 = _mm512_mul_ps(vp4, vscalev); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x112() 132 __m512 vf5 = _mm512_mul_ps(vp5, vscalev); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x112() 133 __m512 vf6 = _mm512_mul_ps(vp6, vscalev); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x112() 185 __m512 vf = _mm512_mul_ps(vp, vscalev); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x112() 219 __m512 vf = _mm512_mul_ps(vp, vscalev); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x112()
|
D | avx2-p5-x96.c | 45 const __m256 vscalev = _mm256_set1_ps(scale_value); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x96() local 178 __m256 vf0 = _mm256_mul_ps(vp0, vscalev); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x96() 179 __m256 vf1 = _mm256_mul_ps(vp1, vscalev); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x96() 180 __m256 vf2 = _mm256_mul_ps(vp2, vscalev); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x96() 181 __m256 vf3 = _mm256_mul_ps(vp3, vscalev); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x96() 182 __m256 vf4 = _mm256_mul_ps(vp4, vscalev); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x96() 183 __m256 vf5 = _mm256_mul_ps(vp5, vscalev); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x96() 184 __m256 vf6 = _mm256_mul_ps(vp6, vscalev); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x96() 185 __m256 vf7 = _mm256_mul_ps(vp7, vscalev); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x96() 186 __m256 vf8 = _mm256_mul_ps(vp8, vscalev); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x96() [all …]
|
D | avx512f-p5-scalef-x96.c | 39 const __m512 vscalev = _mm512_set1_ps(scale_value); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x96() local 118 __m512 vf0 = _mm512_mul_ps(vp0, vscalev); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x96() 119 __m512 vf1 = _mm512_mul_ps(vp1, vscalev); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x96() 120 __m512 vf2 = _mm512_mul_ps(vp2, vscalev); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x96() 121 __m512 vf3 = _mm512_mul_ps(vp3, vscalev); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x96() 122 __m512 vf4 = _mm512_mul_ps(vp4, vscalev); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x96() 123 __m512 vf5 = _mm512_mul_ps(vp5, vscalev); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x96() 172 __m512 vf = _mm512_mul_ps(vp, vscalev); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x96() 206 __m512 vf = _mm512_mul_ps(vp, vscalev); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x96()
|
D | avx2-p5-x88.c | 45 const __m256 vscalev = _mm256_set1_ps(scale_value); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x88() local 169 __m256 vf0 = _mm256_mul_ps(vp0, vscalev); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x88() 170 __m256 vf1 = _mm256_mul_ps(vp1, vscalev); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x88() 171 __m256 vf2 = _mm256_mul_ps(vp2, vscalev); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x88() 172 __m256 vf3 = _mm256_mul_ps(vp3, vscalev); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x88() 173 __m256 vf4 = _mm256_mul_ps(vp4, vscalev); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x88() 174 __m256 vf5 = _mm256_mul_ps(vp5, vscalev); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x88() 175 __m256 vf6 = _mm256_mul_ps(vp6, vscalev); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x88() 176 __m256 vf7 = _mm256_mul_ps(vp7, vscalev); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x88() 177 __m256 vf8 = _mm256_mul_ps(vp8, vscalev); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x88() [all …]
|
D | avx512f-p5-scalef-x80.c | 39 const __m512 vscalev = _mm512_set1_ps(scale_value); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x80() local 109 __m512 vf0 = _mm512_mul_ps(vp0, vscalev); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x80() 110 __m512 vf1 = _mm512_mul_ps(vp1, vscalev); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x80() 111 __m512 vf2 = _mm512_mul_ps(vp2, vscalev); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x80() 112 __m512 vf3 = _mm512_mul_ps(vp3, vscalev); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x80() 113 __m512 vf4 = _mm512_mul_ps(vp4, vscalev); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x80() 159 __m512 vf = _mm512_mul_ps(vp, vscalev); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x80() 193 __m512 vf = _mm512_mul_ps(vp, vscalev); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x80()
|
D | avx2-p5-x80.c | 45 const __m256 vscalev = _mm256_set1_ps(scale_value); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x80() local 160 __m256 vf0 = _mm256_mul_ps(vp0, vscalev); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x80() 161 __m256 vf1 = _mm256_mul_ps(vp1, vscalev); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x80() 162 __m256 vf2 = _mm256_mul_ps(vp2, vscalev); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x80() 163 __m256 vf3 = _mm256_mul_ps(vp3, vscalev); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x80() 164 __m256 vf4 = _mm256_mul_ps(vp4, vscalev); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x80() 165 __m256 vf5 = _mm256_mul_ps(vp5, vscalev); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x80() 166 __m256 vf6 = _mm256_mul_ps(vp6, vscalev); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x80() 167 __m256 vf7 = _mm256_mul_ps(vp7, vscalev); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x80() 168 __m256 vf8 = _mm256_mul_ps(vp8, vscalev); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x80() [all …]
|
D | avx2-p5-x72.c | 45 const __m256 vscalev = _mm256_set1_ps(scale_value); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x72() local 151 __m256 vf0 = _mm256_mul_ps(vp0, vscalev); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x72() 152 __m256 vf1 = _mm256_mul_ps(vp1, vscalev); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x72() 153 __m256 vf2 = _mm256_mul_ps(vp2, vscalev); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x72() 154 __m256 vf3 = _mm256_mul_ps(vp3, vscalev); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x72() 155 __m256 vf4 = _mm256_mul_ps(vp4, vscalev); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x72() 156 __m256 vf5 = _mm256_mul_ps(vp5, vscalev); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x72() 157 __m256 vf6 = _mm256_mul_ps(vp6, vscalev); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x72() 158 __m256 vf7 = _mm256_mul_ps(vp7, vscalev); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x72() 159 __m256 vf8 = _mm256_mul_ps(vp8, vscalev); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x72() [all …]
|
D | avx2-p5-x56.c | 45 const __m256 vscalev = _mm256_set1_ps(scale_value); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x56() local 133 __m256 vf0 = _mm256_mul_ps(vp0, vscalev); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x56() 134 __m256 vf1 = _mm256_mul_ps(vp1, vscalev); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x56() 135 __m256 vf2 = _mm256_mul_ps(vp2, vscalev); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x56() 136 __m256 vf3 = _mm256_mul_ps(vp3, vscalev); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x56() 137 __m256 vf4 = _mm256_mul_ps(vp4, vscalev); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x56() 138 __m256 vf5 = _mm256_mul_ps(vp5, vscalev); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x56() 139 __m256 vf6 = _mm256_mul_ps(vp6, vscalev); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x56() 213 __m256 vf = _mm256_mul_ps(vp, vscalev); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x56() 253 __m256 vf = _mm256_mul_ps(vp, vscalev); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x56()
|
D | avx512f-p5-scalef-x64.c | 39 const __m512 vscalev = _mm512_set1_ps(scale_value); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x64() local 100 __m512 vf0 = _mm512_mul_ps(vp0, vscalev); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x64() 101 __m512 vf1 = _mm512_mul_ps(vp1, vscalev); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x64() 102 __m512 vf2 = _mm512_mul_ps(vp2, vscalev); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x64() 103 __m512 vf3 = _mm512_mul_ps(vp3, vscalev); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x64() 146 __m512 vf = _mm512_mul_ps(vp, vscalev); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x64() 180 __m512 vf = _mm512_mul_ps(vp, vscalev); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x64()
|
D | avx2-p5-x64.c | 45 const __m256 vscalev = _mm256_set1_ps(scale_value); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x64() local 142 __m256 vf0 = _mm256_mul_ps(vp0, vscalev); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x64() 143 __m256 vf1 = _mm256_mul_ps(vp1, vscalev); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x64() 144 __m256 vf2 = _mm256_mul_ps(vp2, vscalev); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x64() 145 __m256 vf3 = _mm256_mul_ps(vp3, vscalev); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x64() 146 __m256 vf4 = _mm256_mul_ps(vp4, vscalev); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x64() 147 __m256 vf5 = _mm256_mul_ps(vp5, vscalev); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x64() 148 __m256 vf6 = _mm256_mul_ps(vp6, vscalev); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x64() 149 __m256 vf7 = _mm256_mul_ps(vp7, vscalev); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x64() 228 __m256 vf = _mm256_mul_ps(vp, vscalev); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x64() [all …]
|
D | avx2-p5-x48.c | 45 const __m256 vscalev = _mm256_set1_ps(scale_value); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x48() local 124 __m256 vf0 = _mm256_mul_ps(vp0, vscalev); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x48() 125 __m256 vf1 = _mm256_mul_ps(vp1, vscalev); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x48() 126 __m256 vf2 = _mm256_mul_ps(vp2, vscalev); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x48() 127 __m256 vf3 = _mm256_mul_ps(vp3, vscalev); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x48() 128 __m256 vf4 = _mm256_mul_ps(vp4, vscalev); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x48() 129 __m256 vf5 = _mm256_mul_ps(vp5, vscalev); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x48() 198 __m256 vf = _mm256_mul_ps(vp, vscalev); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x48() 238 __m256 vf = _mm256_mul_ps(vp, vscalev); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x48()
|
D | avx512f-p5-scalef-x48.c | 39 const __m512 vscalev = _mm512_set1_ps(scale_value); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x48() local 91 __m512 vf0 = _mm512_mul_ps(vp0, vscalev); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x48() 92 __m512 vf1 = _mm512_mul_ps(vp1, vscalev); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x48() 93 __m512 vf2 = _mm512_mul_ps(vp2, vscalev); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x48() 133 __m512 vf = _mm512_mul_ps(vp, vscalev); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x48() 167 __m512 vf = _mm512_mul_ps(vp, vscalev); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x48()
|
D | avx2-p5-x40.c | 45 const __m256 vscalev = _mm256_set1_ps(scale_value); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x40() local 115 __m256 vf0 = _mm256_mul_ps(vp0, vscalev); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x40() 116 __m256 vf1 = _mm256_mul_ps(vp1, vscalev); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x40() 117 __m256 vf2 = _mm256_mul_ps(vp2, vscalev); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x40() 118 __m256 vf3 = _mm256_mul_ps(vp3, vscalev); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x40() 119 __m256 vf4 = _mm256_mul_ps(vp4, vscalev); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x40() 183 __m256 vf = _mm256_mul_ps(vp, vscalev); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x40() 223 __m256 vf = _mm256_mul_ps(vp, vscalev); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x40()
|
D | avx2-p5-x32.c | 45 const __m256 vscalev = _mm256_set1_ps(scale_value); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x32() local 106 __m256 vf0 = _mm256_mul_ps(vp0, vscalev); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x32() 107 __m256 vf1 = _mm256_mul_ps(vp1, vscalev); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x32() 108 __m256 vf2 = _mm256_mul_ps(vp2, vscalev); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x32() 109 __m256 vf3 = _mm256_mul_ps(vp3, vscalev); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x32() 168 __m256 vf = _mm256_mul_ps(vp, vscalev); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x32() 208 __m256 vf = _mm256_mul_ps(vp, vscalev); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x32()
|
D | avx512f-p5-scalef-x32.c | 39 const __m512 vscalev = _mm512_set1_ps(scale_value); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x32() local 82 __m512 vf0 = _mm512_mul_ps(vp0, vscalev); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x32() 83 __m512 vf1 = _mm512_mul_ps(vp1, vscalev); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x32() 120 __m512 vf = _mm512_mul_ps(vp, vscalev); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x32() 154 __m512 vf = _mm512_mul_ps(vp, vscalev); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x32()
|
D | avx2-p5-x24.c | 45 const __m256 vscalev = _mm256_set1_ps(scale_value); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x24() local 97 __m256 vf0 = _mm256_mul_ps(vp0, vscalev); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x24() 98 __m256 vf1 = _mm256_mul_ps(vp1, vscalev); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x24() 99 __m256 vf2 = _mm256_mul_ps(vp2, vscalev); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x24() 153 __m256 vf = _mm256_mul_ps(vp, vscalev); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x24() 193 __m256 vf = _mm256_mul_ps(vp, vscalev); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x24()
|
D | avx512f-p5-scalef-x16.c | 39 const __m512 vscalev = _mm512_set1_ps(scale_value); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x16() local 73 __m512 vf0 = _mm512_mul_ps(vp0, vscalev); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x16() 107 __m512 vf = _mm512_mul_ps(vp, vscalev); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x16() 141 __m512 vf = _mm512_mul_ps(vp, vscalev); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x16()
|
D | avx2-p5-x16.c | 45 const __m256 vscalev = _mm256_set1_ps(scale_value); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x16() local 88 __m256 vf0 = _mm256_mul_ps(vp0, vscalev); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x16() 89 __m256 vf1 = _mm256_mul_ps(vp1, vscalev); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x16() 138 __m256 vf = _mm256_mul_ps(vp, vscalev); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x16() 178 __m256 vf = _mm256_mul_ps(vp, vscalev); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x16()
|
D | avx2-p5-x8.c | 45 const __m256 vscalev = _mm256_set1_ps(scale_value); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x8() local 79 __m256 vf0 = _mm256_mul_ps(vp0, vscalev); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x8() 123 __m256 vf = _mm256_mul_ps(vp, vscalev); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x8() 163 __m256 vf = _mm256_mul_ps(vp, vscalev); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x8()
|
/external/XNNPACK/src/f32-vscaleextexp/ |
D | avx512f-p5-scalef.c.in | 39 const __m512 vscalev = _mm512_set1_ps(scale_value); variable 84 __m512 vf${N} = _mm512_mul_ps(vp${N}, vscalev); 121 __m512 vf = _mm512_mul_ps(vp, vscalev); 155 __m512 vf = _mm512_mul_ps(vp, vscalev);
|