Home
last modified time | relevance | path

Searched refs:ven_lo (Results 1 – 17 of 17) sorted by relevance

/external/XNNPACK/src/math/
Dexp-avx-rr2-p5.c62 __m128i ven_lo = _mm_max_epi16(veo_lo, vmin_exponent); in xnn_math_f32_exp__avx_rr2_p5() local
64 ven_lo = _mm_min_epi16(ven_lo, vmax_exponent); in xnn_math_f32_exp__avx_rr2_p5()
66 veo_lo = _mm_sub_epi32(veo_lo, ven_lo); in xnn_math_f32_exp__avx_rr2_p5()
68 const __m128 vsn_lo = _mm_castsi128_ps(_mm_add_epi32(ven_lo, vdefault_exponent)); in xnn_math_f32_exp__avx_rr2_p5()
Dexpm1minus-avx-rr2-lut4-p4-perm.c71 …const __m128 ven_lo = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(ven)… in xnn_math_f32_expm1minus__avx_rr2_lut4_p4_perm() local
78 …const __m256 vs = _mm256_mul_ps(vl, _mm256_insertf128_ps(_mm256_castps128_ps256(ven_lo), ven_hi, 1… in xnn_math_f32_expm1minus__avx_rr2_lut4_p4_perm()
Dexpm1minus-avx-rr2-lut16-p3.c68 const __m128i ven_lo = _mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vn)), 19); in xnn_math_f32_expm1minus__avx_rr2_lut16_p3() local
101 const __m128 vs_lo = _mm_castsi128_ps(_mm_add_epi32(vl_lo, ven_lo)); in xnn_math_f32_expm1minus__avx_rr2_lut16_p3()
/external/XNNPACK/src/f32-velu/gen/
Dvelu-avx-rr2-lut4-p4-perm-x8.c55 …const __m128 ven_lo = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(ven)… in xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x8() local
60 ven = _mm256_insertf128_ps(_mm256_castps128_ps256(ven_lo), ven_hi, 1); in xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x8()
91 …const __m128 ven_lo = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(ven)… in xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x8() local
96 ven = _mm256_insertf128_ps(_mm256_castps128_ps256(ven_lo), ven_hi, 1); in xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x8()
Dvelu-avx-rr2-lut4-p4-perm-x16.c117 …const __m128 ven_lo = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(ven)… in xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x16() local
122 ven = _mm256_insertf128_ps(_mm256_castps128_ps256(ven_lo), ven_hi, 1); in xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x16()
153 …const __m128 ven_lo = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(ven)… in xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x16() local
158 ven = _mm256_insertf128_ps(_mm256_castps128_ps256(ven_lo), ven_hi, 1); in xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x16()
Dvelu-avx-rr2-lut16-p3-x8.c80 const __m128i ven_lo = _mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vn)), 19); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x8() local
87 const __m128 vs_lo = _mm_castsi128_ps(_mm_add_epi32(vl_lo, ven_lo)); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x8()
146 const __m128i ven_lo = _mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vn)), 19); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x8() local
153 const __m128 vs_lo = _mm_castsi128_ps(_mm_add_epi32(vl_lo, ven_lo)); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x8()
Dvelu-avx-rr2-lut4-p4-perm-x24.c139 …const __m128 ven_lo = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(ven)… in xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x24() local
144 ven = _mm256_insertf128_ps(_mm256_castps128_ps256(ven_lo), ven_hi, 1); in xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x24()
175 …const __m128 ven_lo = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(ven)… in xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x24() local
180 ven = _mm256_insertf128_ps(_mm256_castps128_ps256(ven_lo), ven_hi, 1); in xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x24()
Dvelu-avx-rr2-lut4-p4-perm-x32.c161 …const __m128 ven_lo = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(ven)… in xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x32() local
166 ven = _mm256_insertf128_ps(_mm256_castps128_ps256(ven_lo), ven_hi, 1); in xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x32()
197 …const __m128 ven_lo = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(ven)… in xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x32() local
202 ven = _mm256_insertf128_ps(_mm256_castps128_ps256(ven_lo), ven_hi, 1); in xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x32()
Dvelu-avx-rr2-lut4-p4-perm-x40.c183 …const __m128 ven_lo = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(ven)… in xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x40() local
188 ven = _mm256_insertf128_ps(_mm256_castps128_ps256(ven_lo), ven_hi, 1); in xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x40()
219 …const __m128 ven_lo = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(ven)… in xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x40() local
224 ven = _mm256_insertf128_ps(_mm256_castps128_ps256(ven_lo), ven_hi, 1); in xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x40()
Dvelu-avx-rr2-lut4-p4-perm-x48.c205 …const __m128 ven_lo = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(ven)… in xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x48() local
210 ven = _mm256_insertf128_ps(_mm256_castps128_ps256(ven_lo), ven_hi, 1); in xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x48()
241 …const __m128 ven_lo = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(ven)… in xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x48() local
246 ven = _mm256_insertf128_ps(_mm256_castps128_ps256(ven_lo), ven_hi, 1); in xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x48()
Dvelu-avx-rr2-lut16-p3-x16.c195 const __m128i ven_lo = _mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vn)), 19); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x16() local
202 const __m128 vs_lo = _mm_castsi128_ps(_mm_add_epi32(vl_lo, ven_lo)); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x16()
261 const __m128i ven_lo = _mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vn)), 19); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x16() local
268 const __m128 vs_lo = _mm_castsi128_ps(_mm_add_epi32(vl_lo, ven_lo)); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x16()
Dvelu-avx-rr2-lut16-p3-x24.c244 const __m128i ven_lo = _mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vn)), 19); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x24() local
251 const __m128 vs_lo = _mm_castsi128_ps(_mm_add_epi32(vl_lo, ven_lo)); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x24()
310 const __m128i ven_lo = _mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vn)), 19); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x24() local
317 const __m128 vs_lo = _mm_castsi128_ps(_mm_add_epi32(vl_lo, ven_lo)); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x24()
Dvelu-avx-rr2-lut16-p3-x32.c293 const __m128i ven_lo = _mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vn)), 19); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x32() local
300 const __m128 vs_lo = _mm_castsi128_ps(_mm_add_epi32(vl_lo, ven_lo)); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x32()
359 const __m128i ven_lo = _mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vn)), 19); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x32() local
366 const __m128 vs_lo = _mm_castsi128_ps(_mm_add_epi32(vl_lo, ven_lo)); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x32()
Dvelu-avx-rr2-lut16-p3-x40.c342 const __m128i ven_lo = _mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vn)), 19); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x40() local
349 const __m128 vs_lo = _mm_castsi128_ps(_mm_add_epi32(vl_lo, ven_lo)); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x40()
408 const __m128i ven_lo = _mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vn)), 19); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x40() local
415 const __m128 vs_lo = _mm_castsi128_ps(_mm_add_epi32(vl_lo, ven_lo)); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x40()
Dvelu-avx-rr2-lut16-p3-x48.c391 const __m128i ven_lo = _mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vn)), 19); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x48() local
398 const __m128 vs_lo = _mm_castsi128_ps(_mm_add_epi32(vl_lo, ven_lo)); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x48()
457 const __m128i ven_lo = _mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vn)), 19); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x48() local
464 const __m128 vs_lo = _mm_castsi128_ps(_mm_add_epi32(vl_lo, ven_lo)); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x48()
/external/XNNPACK/src/f32-velu/
Davx-rr2-lut4-p4-perm.c.in112 …const __m128 ven_lo = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(ven)… variable
117 ven = _mm256_insertf128_ps(_mm256_castps128_ps256(ven_lo), ven_hi, 1);
148 …const __m128 ven_lo = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(ven)… variable
153 ven = _mm256_insertf128_ps(_mm256_castps128_ps256(ven_lo), ven_hi, 1);
Davx-rr2-lut16-p3.c.in162 const __m128i ven_lo = _mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vn)), 19);
169 const __m128 vs_lo = _mm_castsi128_ps(_mm_add_epi32(vl_lo, ven_lo));
228 const __m128i ven_lo = _mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vn)), 19);
235 const __m128 vs_lo = _mm_castsi128_ps(_mm_add_epi32(vl_lo, ven_lo));