/external/XNNPACK/src/math/ |
D | exp-avx-rr2-p5.c | 62 __m128i ven_lo = _mm_max_epi16(veo_lo, vmin_exponent); in xnn_math_f32_exp__avx_rr2_p5() local 64 ven_lo = _mm_min_epi16(ven_lo, vmax_exponent); in xnn_math_f32_exp__avx_rr2_p5() 66 veo_lo = _mm_sub_epi32(veo_lo, ven_lo); in xnn_math_f32_exp__avx_rr2_p5() 68 const __m128 vsn_lo = _mm_castsi128_ps(_mm_add_epi32(ven_lo, vdefault_exponent)); in xnn_math_f32_exp__avx_rr2_p5()
|
D | expm1minus-avx-rr2-lut4-p4-perm.c | 71 …const __m128 ven_lo = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(ven)… in xnn_math_f32_expm1minus__avx_rr2_lut4_p4_perm() local 78 …const __m256 vs = _mm256_mul_ps(vl, _mm256_insertf128_ps(_mm256_castps128_ps256(ven_lo), ven_hi, 1… in xnn_math_f32_expm1minus__avx_rr2_lut4_p4_perm()
|
D | expm1minus-avx-rr2-lut16-p3.c | 68 const __m128i ven_lo = _mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vn)), 19); in xnn_math_f32_expm1minus__avx_rr2_lut16_p3() local 101 const __m128 vs_lo = _mm_castsi128_ps(_mm_add_epi32(vl_lo, ven_lo)); in xnn_math_f32_expm1minus__avx_rr2_lut16_p3()
|
/external/XNNPACK/src/f32-velu/gen/ |
D | velu-avx-rr2-lut4-p4-perm-x8.c | 55 …const __m128 ven_lo = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(ven)… in xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x8() local 60 ven = _mm256_insertf128_ps(_mm256_castps128_ps256(ven_lo), ven_hi, 1); in xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x8() 91 …const __m128 ven_lo = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(ven)… in xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x8() local 96 ven = _mm256_insertf128_ps(_mm256_castps128_ps256(ven_lo), ven_hi, 1); in xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x8()
|
D | velu-avx-rr2-lut4-p4-perm-x16.c | 117 …const __m128 ven_lo = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(ven)… in xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x16() local 122 ven = _mm256_insertf128_ps(_mm256_castps128_ps256(ven_lo), ven_hi, 1); in xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x16() 153 …const __m128 ven_lo = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(ven)… in xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x16() local 158 ven = _mm256_insertf128_ps(_mm256_castps128_ps256(ven_lo), ven_hi, 1); in xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x16()
|
D | velu-avx-rr2-lut16-p3-x8.c | 80 const __m128i ven_lo = _mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vn)), 19); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x8() local 87 const __m128 vs_lo = _mm_castsi128_ps(_mm_add_epi32(vl_lo, ven_lo)); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x8() 146 const __m128i ven_lo = _mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vn)), 19); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x8() local 153 const __m128 vs_lo = _mm_castsi128_ps(_mm_add_epi32(vl_lo, ven_lo)); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x8()
|
D | velu-avx-rr2-lut4-p4-perm-x24.c | 139 …const __m128 ven_lo = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(ven)… in xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x24() local 144 ven = _mm256_insertf128_ps(_mm256_castps128_ps256(ven_lo), ven_hi, 1); in xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x24() 175 …const __m128 ven_lo = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(ven)… in xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x24() local 180 ven = _mm256_insertf128_ps(_mm256_castps128_ps256(ven_lo), ven_hi, 1); in xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x24()
|
D | velu-avx-rr2-lut4-p4-perm-x32.c | 161 …const __m128 ven_lo = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(ven)… in xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x32() local 166 ven = _mm256_insertf128_ps(_mm256_castps128_ps256(ven_lo), ven_hi, 1); in xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x32() 197 …const __m128 ven_lo = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(ven)… in xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x32() local 202 ven = _mm256_insertf128_ps(_mm256_castps128_ps256(ven_lo), ven_hi, 1); in xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x32()
|
D | velu-avx-rr2-lut4-p4-perm-x40.c | 183 …const __m128 ven_lo = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(ven)… in xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x40() local 188 ven = _mm256_insertf128_ps(_mm256_castps128_ps256(ven_lo), ven_hi, 1); in xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x40() 219 …const __m128 ven_lo = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(ven)… in xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x40() local 224 ven = _mm256_insertf128_ps(_mm256_castps128_ps256(ven_lo), ven_hi, 1); in xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x40()
|
D | velu-avx-rr2-lut4-p4-perm-x48.c | 205 …const __m128 ven_lo = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(ven)… in xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x48() local 210 ven = _mm256_insertf128_ps(_mm256_castps128_ps256(ven_lo), ven_hi, 1); in xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x48() 241 …const __m128 ven_lo = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(ven)… in xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x48() local 246 ven = _mm256_insertf128_ps(_mm256_castps128_ps256(ven_lo), ven_hi, 1); in xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x48()
|
D | velu-avx-rr2-lut16-p3-x16.c | 195 const __m128i ven_lo = _mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vn)), 19); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x16() local 202 const __m128 vs_lo = _mm_castsi128_ps(_mm_add_epi32(vl_lo, ven_lo)); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x16() 261 const __m128i ven_lo = _mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vn)), 19); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x16() local 268 const __m128 vs_lo = _mm_castsi128_ps(_mm_add_epi32(vl_lo, ven_lo)); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x16()
|
D | velu-avx-rr2-lut16-p3-x24.c | 244 const __m128i ven_lo = _mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vn)), 19); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x24() local 251 const __m128 vs_lo = _mm_castsi128_ps(_mm_add_epi32(vl_lo, ven_lo)); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x24() 310 const __m128i ven_lo = _mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vn)), 19); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x24() local 317 const __m128 vs_lo = _mm_castsi128_ps(_mm_add_epi32(vl_lo, ven_lo)); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x24()
|
D | velu-avx-rr2-lut16-p3-x32.c | 293 const __m128i ven_lo = _mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vn)), 19); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x32() local 300 const __m128 vs_lo = _mm_castsi128_ps(_mm_add_epi32(vl_lo, ven_lo)); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x32() 359 const __m128i ven_lo = _mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vn)), 19); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x32() local 366 const __m128 vs_lo = _mm_castsi128_ps(_mm_add_epi32(vl_lo, ven_lo)); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x32()
|
D | velu-avx-rr2-lut16-p3-x40.c | 342 const __m128i ven_lo = _mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vn)), 19); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x40() local 349 const __m128 vs_lo = _mm_castsi128_ps(_mm_add_epi32(vl_lo, ven_lo)); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x40() 408 const __m128i ven_lo = _mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vn)), 19); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x40() local 415 const __m128 vs_lo = _mm_castsi128_ps(_mm_add_epi32(vl_lo, ven_lo)); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x40()
|
D | velu-avx-rr2-lut16-p3-x48.c | 391 const __m128i ven_lo = _mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vn)), 19); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x48() local 398 const __m128 vs_lo = _mm_castsi128_ps(_mm_add_epi32(vl_lo, ven_lo)); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x48() 457 const __m128i ven_lo = _mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vn)), 19); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x48() local 464 const __m128 vs_lo = _mm_castsi128_ps(_mm_add_epi32(vl_lo, ven_lo)); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x48()
|
/external/XNNPACK/src/f32-velu/ |
D | avx-rr2-lut4-p4-perm.c.in | 112 …const __m128 ven_lo = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(ven)… variable 117 ven = _mm256_insertf128_ps(_mm256_castps128_ps256(ven_lo), ven_hi, 1); 148 …const __m128 ven_lo = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(ven)… variable 153 ven = _mm256_insertf128_ps(_mm256_castps128_ps256(ven_lo), ven_hi, 1);
|
D | avx-rr2-lut16-p3.c.in | 162 const __m128i ven_lo = _mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vn)), 19); 169 const __m128 vs_lo = _mm_castsi128_ps(_mm_add_epi32(vl_lo, ven_lo)); 228 const __m128i ven_lo = _mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vn)), 19); 235 const __m128 vs_lo = _mm_castsi128_ps(_mm_add_epi32(vl_lo, ven_lo));
|