/external/XNNPACK/src/f32-velu/gen/ |
D | velu-avx512f-rr1-lut16-p3-perm-x112.c | 80 const __m512i ven6 = _mm512_slli_epi32(_mm512_castps_si512(vn6), 19); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x112() local 95 __m512 vs6 = _mm512_castsi512_ps(_mm512_add_epi32(vl6, ven6)); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x112()
|
D | velu-avx2-rr1-lut4-p4-perm-x56.c | 87 const __m256i ven6 = _mm256_slli_epi32(_mm256_castps_si256(vn6), 21); in xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x56() local 103 __m256 vs6 = _mm256_castsi256_ps(_mm256_add_epi32(vl6, ven6)); in xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x56()
|
D | velu-avx2-rr1-lut16-p3-gather-x56.c | 95 const __m256i ven6 = _mm256_slli_epi32(_mm256_castps_si256(vn6), 19); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x56() local 110 __m256 vs6 = _mm256_castsi256_ps(_mm256_add_epi32(vl6, ven6)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x56()
|
D | velu-avx512f-rr1-lut16-p3-perm-x128.c | 83 const __m512i ven6 = _mm512_slli_epi32(_mm512_castps_si512(vn6), 19); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x128() local 100 __m512 vs6 = _mm512_castsi512_ps(_mm512_add_epi32(vl6, ven6)); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x128()
|
D | velu-avx2-rr1-lut8-p4-perm-x56.c | 86 const __m256i ven6 = _mm256_slli_epi32(_mm256_castps_si256(vn6), 20); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x56() local 102 __m256 vs6 = _mm256_castsi256_ps(_mm256_add_epi32(vl6, ven6)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x56()
|
D | velu-avx2-rr1-lut4-p4-perm-x64.c | 90 const __m256i ven6 = _mm256_slli_epi32(_mm256_castps_si256(vn6), 21); in xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x64() local 109 __m256 vs6 = _mm256_castsi256_ps(_mm256_add_epi32(vl6, ven6)); in xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x64()
|
D | velu-avx2-rr1-lut16-p3-gather-x64.c | 100 const __m256i ven6 = _mm256_slli_epi32(_mm256_castps_si256(vn6), 19); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x64() local 117 __m256 vs6 = _mm256_castsi256_ps(_mm256_add_epi32(vl6, ven6)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x64()
|
D | velu-avx2-rr1-lut8-p4-perm-x64.c | 89 const __m256i ven6 = _mm256_slli_epi32(_mm256_castps_si256(vn6), 20); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x64() local 108 __m256 vs6 = _mm256_castsi256_ps(_mm256_add_epi32(vl6, ven6)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x64()
|
D | velu-avx2-rr1-lut4-p4-perm-x72.c | 93 const __m256i ven6 = _mm256_slli_epi32(_mm256_castps_si256(vn6), 21); in xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x72() local 115 __m256 vs6 = _mm256_castsi256_ps(_mm256_add_epi32(vl6, ven6)); in xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x72()
|
D | velu-avx2-rr1-lut16-p3-gather-x72.c | 105 const __m256i ven6 = _mm256_slli_epi32(_mm256_castps_si256(vn6), 19); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x72() local 124 __m256 vs6 = _mm256_castsi256_ps(_mm256_add_epi32(vl6, ven6)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x72()
|
D | velu-avx2-rr1-lut8-p4-perm-x72.c | 92 const __m256i ven6 = _mm256_slli_epi32(_mm256_castps_si256(vn6), 20); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x72() local 114 __m256 vs6 = _mm256_castsi256_ps(_mm256_add_epi32(vl6, ven6)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x72()
|
D | velu-avx2-rr1-lut8-p4-perm-x80.c | 95 const __m256i ven6 = _mm256_slli_epi32(_mm256_castps_si256(vn6), 20); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x80() local 120 __m256 vs6 = _mm256_castsi256_ps(_mm256_add_epi32(vl6, ven6)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x80()
|
D | velu-avx2-rr1-lut16-p3-gather-x80.c | 110 const __m256i ven6 = _mm256_slli_epi32(_mm256_castps_si256(vn6), 19); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x80() local 131 __m256 vs6 = _mm256_castsi256_ps(_mm256_add_epi32(vl6, ven6)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x80()
|
D | velu-avx2-rr1-lut4-p4-perm-x80.c | 96 const __m256i ven6 = _mm256_slli_epi32(_mm256_castps_si256(vn6), 21); in xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x80() local 121 __m256 vs6 = _mm256_castsi256_ps(_mm256_add_epi32(vl6, ven6)); in xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x80()
|