/external/XNNPACK/src/f32-raddextexp/gen/ |
D | avx512f-p5-scalef-x128-acc2.c | 42 __m512 vacce0 = vminus_inf; in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128_acc2() local 142 __m512 vmax_e0 = _mm512_max_ps(vacce0, vn0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128_acc2() 151 const __m512 vdelta_acce0 = _mm512_sub_ps(vacce0, vmax_e0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128_acc2() 174 vacce0 = vmax_e0; in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128_acc2() 179 const __m512 vmax_acce01 = _mm512_max_ps(vacce0, vacce1); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128_acc2() 181 const __m512 vdelta_acce0 = _mm512_sub_ps(vacce0, vmax_acce01); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128_acc2()
|
D | avx512f-p5-scalef-x128-acc4.c | 44 __m512 vacce0 = vminus_inf; in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128_acc4() local 146 __m512 vmax_e0 = _mm512_max_ps(vacce0, vn0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128_acc4() 155 const __m512 vdelta_acce0 = _mm512_sub_ps(vacce0, vmax_e0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128_acc4() 182 vacce0 = vmax_e0; in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128_acc4() 189 const __m512 vmax_acce01 = _mm512_max_ps(vacce0, vacce1); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128_acc4() 193 const __m512 vdelta_acce0 = _mm512_sub_ps(vacce0, vmax_acce0123); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128_acc4()
|
D | avx512f-p5-scalef-x160-acc2.c | 42 __m512 vacce0 = vminus_inf; in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160_acc2() local 160 __m512 vmax_e0 = _mm512_max_ps(vacce0, vn0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160_acc2() 171 const __m512 vdelta_acce0 = _mm512_sub_ps(vacce0, vmax_e0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160_acc2() 198 vacce0 = vmax_e0; in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160_acc2() 203 const __m512 vmax_acce01 = _mm512_max_ps(vacce0, vacce1); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160_acc2() 205 const __m512 vdelta_acce0 = _mm512_sub_ps(vacce0, vmax_acce01); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160_acc2()
|
D | avx512f-p5-scalef-x144-acc3.c | 43 __m512 vacce0 = vminus_inf; in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x144_acc3() local 153 __m512 vmax_e0 = _mm512_max_ps(vacce0, vn0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x144_acc3() 163 const __m512 vdelta_acce0 = _mm512_sub_ps(vacce0, vmax_e0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x144_acc3() 190 vacce0 = vmax_e0; in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x144_acc3() 196 const __m512 vmax_acce01 = _mm512_max_ps(vacce0, vacce1); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x144_acc3() 200 const __m512 vdelta_acce0 = _mm512_sub_ps(vacce0, vmax_acce012); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x144_acc3()
|
D | avx512f-p5-scalef-x128.c | 41 __m512 vacce0 = vminus_inf; in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128() local 140 __m512 vmax_e0 = _mm512_max_ps(vacce0, vn0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128() 149 const __m512 vdelta_acce0 = _mm512_sub_ps(vacce0, vmax_e0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128() 170 vacce0 = vmax_e0; in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128() 175 __m512 vacce = vacce0; in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128()
|
D | avx512f-p5-scalef-x192-acc2.c | 42 __m512 vacce0 = vminus_inf; in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc2() local 178 __m512 vmax_e0 = _mm512_max_ps(vacce0, vn0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc2() 191 const __m512 vdelta_acce0 = _mm512_sub_ps(vacce0, vmax_e0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc2() 222 vacce0 = vmax_e0; in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc2() 227 const __m512 vmax_acce01 = _mm512_max_ps(vacce0, vacce1); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc2() 229 const __m512 vdelta_acce0 = _mm512_sub_ps(vacce0, vmax_acce01); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc2()
|
D | avx512f-p5-scalef-x160-acc5.c | 45 __m512 vacce0 = vminus_inf; in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160_acc5() local 166 __m512 vmax_e0 = _mm512_max_ps(vacce0, vn0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160_acc5() 177 const __m512 vdelta_acce0 = _mm512_sub_ps(vacce0, vmax_e0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160_acc5() 210 vacce0 = vmax_e0; in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160_acc5() 218 const __m512 vmax_acce01 = _mm512_max_ps(vacce0, vacce1); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160_acc5() 224 const __m512 vdelta_acce0 = _mm512_sub_ps(vacce0, vmax_acce01234); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160_acc5()
|
D | avx2-p5-x64-acc2.c | 46 __m256 vacce0 = vminus_inf; in xnn_f32_raddextexp_ukernel__avx2_p5_x64_acc2() local 146 __m256 vmax_e0 = _mm256_max_ps(vacce0, vn0); in xnn_f32_raddextexp_ukernel__avx2_p5_x64_acc2() 159 const __m256 vdelta_acce0 = _mm256_max_ps(_mm256_sub_ps(vacce0, vmax_e0), vmin_exponent); in xnn_f32_raddextexp_ukernel__avx2_p5_x64_acc2() 198 vacce0 = vmax_e0; in xnn_f32_raddextexp_ukernel__avx2_p5_x64_acc2() 203 const __m256 vmax_acce01 = _mm256_max_ps(vacce0, vacce1); in xnn_f32_raddextexp_ukernel__avx2_p5_x64_acc2() 205 const __m256 vdelta_acce0 = _mm256_max_ps(_mm256_sub_ps(vacce0, vmax_acce01), vmin_exponent); in xnn_f32_raddextexp_ukernel__avx2_p5_x64_acc2()
|
D | avx512f-p5-scalef-x144.c | 41 __m512 vacce0 = vminus_inf; in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x144() local 149 __m512 vmax_e0 = _mm512_max_ps(vacce0, vn0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x144() 159 const __m512 vdelta_acce0 = _mm512_sub_ps(vacce0, vmax_e0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x144() 182 vacce0 = vmax_e0; in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x144() 187 __m512 vacce = vacce0; in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x144()
|
D | avx2-p5-x64-acc4.c | 48 __m256 vacce0 = vminus_inf; in xnn_f32_raddextexp_ukernel__avx2_p5_x64_acc4() local 150 __m256 vmax_e0 = _mm256_max_ps(vacce0, vn0); in xnn_f32_raddextexp_ukernel__avx2_p5_x64_acc4() 163 const __m256 vdelta_acce0 = _mm256_max_ps(_mm256_sub_ps(vacce0, vmax_e0), vmin_exponent); in xnn_f32_raddextexp_ukernel__avx2_p5_x64_acc4() 208 vacce0 = vmax_e0; in xnn_f32_raddextexp_ukernel__avx2_p5_x64_acc4() 215 const __m256 vmax_acce01 = _mm256_max_ps(vacce0, vacce1); in xnn_f32_raddextexp_ukernel__avx2_p5_x64_acc4() 219 const __m256 vdelta_acce0 = _mm256_max_ps(_mm256_sub_ps(vacce0, vmax_acce0123), vmin_exponent); in xnn_f32_raddextexp_ukernel__avx2_p5_x64_acc4()
|
D | avx512f-p5-scalef-x192-acc3.c | 43 __m512 vacce0 = vminus_inf; in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc3() local 180 __m512 vmax_e0 = _mm512_max_ps(vacce0, vn0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc3() 193 const __m512 vdelta_acce0 = _mm512_sub_ps(vacce0, vmax_e0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc3() 226 vacce0 = vmax_e0; in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc3() 232 const __m512 vmax_acce01 = _mm512_max_ps(vacce0, vacce1); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc3() 236 const __m512 vdelta_acce0 = _mm512_sub_ps(vacce0, vmax_acce012); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc3()
|
D | avx2-p5-x80-acc2.c | 46 __m256 vacce0 = vminus_inf; in xnn_f32_raddextexp_ukernel__avx2_p5_x80_acc2() local 164 __m256 vmax_e0 = _mm256_max_ps(vacce0, vn0); in xnn_f32_raddextexp_ukernel__avx2_p5_x80_acc2() 179 const __m256 vdelta_acce0 = _mm256_max_ps(_mm256_sub_ps(vacce0, vmax_e0), vmin_exponent); in xnn_f32_raddextexp_ukernel__avx2_p5_x80_acc2() 224 vacce0 = vmax_e0; in xnn_f32_raddextexp_ukernel__avx2_p5_x80_acc2() 229 const __m256 vmax_acce01 = _mm256_max_ps(vacce0, vacce1); in xnn_f32_raddextexp_ukernel__avx2_p5_x80_acc2() 231 const __m256 vdelta_acce0 = _mm256_max_ps(_mm256_sub_ps(vacce0, vmax_acce01), vmin_exponent); in xnn_f32_raddextexp_ukernel__avx2_p5_x80_acc2()
|
D | avx512f-p5-scalef-x192-acc6.c | 46 __m512 vacce0 = vminus_inf; in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc6() local 186 __m512 vmax_e0 = _mm512_max_ps(vacce0, vn0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc6() 199 const __m512 vdelta_acce0 = _mm512_sub_ps(vacce0, vmax_e0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc6() 238 vacce0 = vmax_e0; in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc6() 247 const __m512 vmax_acce01 = _mm512_max_ps(vacce0, vacce1); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc6() 253 const __m512 vdelta_acce0 = _mm512_sub_ps(vacce0, vmax_acce012345); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc6()
|
D | avx2-p5-x72-acc3.c | 47 __m256 vacce0 = vminus_inf; in xnn_f32_raddextexp_ukernel__avx2_p5_x72_acc3() local 157 __m256 vmax_e0 = _mm256_max_ps(vacce0, vn0); in xnn_f32_raddextexp_ukernel__avx2_p5_x72_acc3() 171 const __m256 vdelta_acce0 = _mm256_max_ps(_mm256_sub_ps(vacce0, vmax_e0), vmin_exponent); in xnn_f32_raddextexp_ukernel__avx2_p5_x72_acc3() 216 vacce0 = vmax_e0; in xnn_f32_raddextexp_ukernel__avx2_p5_x72_acc3() 222 const __m256 vmax_acce01 = _mm256_max_ps(vacce0, vacce1); in xnn_f32_raddextexp_ukernel__avx2_p5_x72_acc3() 226 const __m256 vdelta_acce0 = _mm256_max_ps(_mm256_sub_ps(vacce0, vmax_acce012), vmin_exponent); in xnn_f32_raddextexp_ukernel__avx2_p5_x72_acc3()
|
D | avx512f-p5-scalef-x160.c | 41 __m512 vacce0 = vminus_inf; in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160() local 158 __m512 vmax_e0 = _mm512_max_ps(vacce0, vn0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160() 169 const __m512 vdelta_acce0 = _mm512_sub_ps(vacce0, vmax_e0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160() 194 vacce0 = vmax_e0; in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160() 199 __m512 vacce = vacce0; in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160()
|
D | avx2-p5-x64.c | 45 __m256 vacce0 = vminus_inf; in xnn_f32_raddextexp_ukernel__avx2_p5_x64() local 144 __m256 vmax_e0 = _mm256_max_ps(vacce0, vn0); in xnn_f32_raddextexp_ukernel__avx2_p5_x64() 157 const __m256 vdelta_acce0 = _mm256_max_ps(_mm256_sub_ps(vacce0, vmax_e0), vmin_exponent); in xnn_f32_raddextexp_ukernel__avx2_p5_x64() 193 vacce0 = vmax_e0; in xnn_f32_raddextexp_ukernel__avx2_p5_x64() 198 __m256 vacce = vacce0; in xnn_f32_raddextexp_ukernel__avx2_p5_x64()
|
D | avx2-p5-x72.c | 45 __m256 vacce0 = vminus_inf; in xnn_f32_raddextexp_ukernel__avx2_p5_x72() local 153 __m256 vmax_e0 = _mm256_max_ps(vacce0, vn0); in xnn_f32_raddextexp_ukernel__avx2_p5_x72() 167 const __m256 vdelta_acce0 = _mm256_max_ps(_mm256_sub_ps(vacce0, vmax_e0), vmin_exponent); in xnn_f32_raddextexp_ukernel__avx2_p5_x72() 206 vacce0 = vmax_e0; in xnn_f32_raddextexp_ukernel__avx2_p5_x72() 211 __m256 vacce = vacce0; in xnn_f32_raddextexp_ukernel__avx2_p5_x72()
|
D | avx512f-p5-scalef-x192.c | 41 __m512 vacce0 = vminus_inf; in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192() local 176 __m512 vmax_e0 = _mm512_max_ps(vacce0, vn0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192() 189 const __m512 vdelta_acce0 = _mm512_sub_ps(vacce0, vmax_e0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192() 218 vacce0 = vmax_e0; in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192() 223 __m512 vacce = vacce0; in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192()
|
D | avx2-p5-x96-acc3.c | 47 __m256 vacce0 = vminus_inf; in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc3() local 184 __m256 vmax_e0 = _mm256_max_ps(vacce0, vn0); in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc3() 201 const __m256 vdelta_acce0 = _mm256_max_ps(_mm256_sub_ps(vacce0, vmax_e0), vmin_exponent); in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc3() 255 vacce0 = vmax_e0; in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc3() 261 const __m256 vmax_acce01 = _mm256_max_ps(vacce0, vacce1); in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc3() 265 const __m256 vdelta_acce0 = _mm256_max_ps(_mm256_sub_ps(vacce0, vmax_acce012), vmin_exponent); in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc3()
|
D | avx2-p5-x96-acc2.c | 46 __m256 vacce0 = vminus_inf; in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc2() local 182 __m256 vmax_e0 = _mm256_max_ps(vacce0, vn0); in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc2() 199 const __m256 vdelta_acce0 = _mm256_max_ps(_mm256_sub_ps(vacce0, vmax_e0), vmin_exponent); in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc2() 250 vacce0 = vmax_e0; in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc2() 255 const __m256 vmax_acce01 = _mm256_max_ps(vacce0, vacce1); in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc2() 257 const __m256 vdelta_acce0 = _mm256_max_ps(_mm256_sub_ps(vacce0, vmax_acce01), vmin_exponent); in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc2()
|
D | avx2-p5-x80-acc5.c | 49 __m256 vacce0 = vminus_inf; in xnn_f32_raddextexp_ukernel__avx2_p5_x80_acc5() local 170 __m256 vmax_e0 = _mm256_max_ps(vacce0, vn0); in xnn_f32_raddextexp_ukernel__avx2_p5_x80_acc5() 185 const __m256 vdelta_acce0 = _mm256_max_ps(_mm256_sub_ps(vacce0, vmax_e0), vmin_exponent); in xnn_f32_raddextexp_ukernel__avx2_p5_x80_acc5() 239 vacce0 = vmax_e0; in xnn_f32_raddextexp_ukernel__avx2_p5_x80_acc5() 247 const __m256 vmax_acce01 = _mm256_max_ps(vacce0, vacce1); in xnn_f32_raddextexp_ukernel__avx2_p5_x80_acc5() 253 const __m256 vdelta_acce0 = _mm256_max_ps(_mm256_sub_ps(vacce0, vmax_acce01234), vmin_exponent); in xnn_f32_raddextexp_ukernel__avx2_p5_x80_acc5()
|
D | avx2-p5-x80.c | 45 __m256 vacce0 = vminus_inf; in xnn_f32_raddextexp_ukernel__avx2_p5_x80() local 162 __m256 vmax_e0 = _mm256_max_ps(vacce0, vn0); in xnn_f32_raddextexp_ukernel__avx2_p5_x80() 177 const __m256 vdelta_acce0 = _mm256_max_ps(_mm256_sub_ps(vacce0, vmax_e0), vmin_exponent); in xnn_f32_raddextexp_ukernel__avx2_p5_x80() 219 vacce0 = vmax_e0; in xnn_f32_raddextexp_ukernel__avx2_p5_x80() 224 __m256 vacce = vacce0; in xnn_f32_raddextexp_ukernel__avx2_p5_x80()
|
D | avx2-p5-x96-acc6.c | 50 __m256 vacce0 = vminus_inf; in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc6() local 190 __m256 vmax_e0 = _mm256_max_ps(vacce0, vn0); in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc6() 207 const __m256 vdelta_acce0 = _mm256_max_ps(_mm256_sub_ps(vacce0, vmax_e0), vmin_exponent); in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc6() 270 vacce0 = vmax_e0; in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc6() 279 const __m256 vmax_acce01 = _mm256_max_ps(vacce0, vacce1); in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc6() 285 const __m256 vdelta_acce0 = _mm256_max_ps(_mm256_sub_ps(vacce0, vmax_acce012345), vmin_exponent); in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc6()
|
D | avx2-p5-x96.c | 45 __m256 vacce0 = vminus_inf; in xnn_f32_raddextexp_ukernel__avx2_p5_x96() local 180 __m256 vmax_e0 = _mm256_max_ps(vacce0, vn0); in xnn_f32_raddextexp_ukernel__avx2_p5_x96() 197 const __m256 vdelta_acce0 = _mm256_max_ps(_mm256_sub_ps(vacce0, vmax_e0), vmin_exponent); in xnn_f32_raddextexp_ukernel__avx2_p5_x96() 245 vacce0 = vmax_e0; in xnn_f32_raddextexp_ukernel__avx2_p5_x96() 250 __m256 vacce = vacce0; in xnn_f32_raddextexp_ukernel__avx2_p5_x96()
|
/external/XNNPACK/src/f32-raddextexp/ |
D | avx512f-p5-scalef.c.in | 133 __m512 vacce = vacce0;
|