/external/XNNPACK/src/f32-velu/gen/ |
D | velu-avx-rr2-lut16-p3-x8.c | 59 const uint64_t vidx_lh = (uint64_t) _mm_extract_epi64(vidx_lo, 1); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x8() local 63 …h = _mm_loadu_si32((const void*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) vidx_lh)); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x8() 67 …l_lh, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) (vidx_lh >> 32))), 1); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x8() 125 const uint64_t vidx_lh = (uint64_t) _mm_extract_epi64(vidx_lo, 1); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x8() local 129 …h = _mm_loadu_si32((const void*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) vidx_lh)); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x8() 133 …l_lh, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) (vidx_lh >> 32))), 1); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x8()
|
D | velu-avx-rr2-lut16-p3-x16.c | 174 const uint64_t vidx_lh = (uint64_t) _mm_extract_epi64(vidx_lo, 1); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x16() local 178 …h = _mm_loadu_si32((const void*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) vidx_lh)); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x16() 182 …l_lh, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) (vidx_lh >> 32))), 1); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x16() 240 const uint64_t vidx_lh = (uint64_t) _mm_extract_epi64(vidx_lo, 1); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x16() local 244 …h = _mm_loadu_si32((const void*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) vidx_lh)); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x16() 248 …l_lh, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) (vidx_lh >> 32))), 1); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x16()
|
D | velu-avx-rr2-lut16-p3-x24.c | 223 const uint64_t vidx_lh = (uint64_t) _mm_extract_epi64(vidx_lo, 1); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x24() local 227 …h = _mm_loadu_si32((const void*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) vidx_lh)); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x24() 231 …l_lh, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) (vidx_lh >> 32))), 1); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x24() 289 const uint64_t vidx_lh = (uint64_t) _mm_extract_epi64(vidx_lo, 1); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x24() local 293 …h = _mm_loadu_si32((const void*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) vidx_lh)); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x24() 297 …l_lh, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) (vidx_lh >> 32))), 1); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x24()
|
D | velu-avx-rr2-lut16-p3-x32.c | 272 const uint64_t vidx_lh = (uint64_t) _mm_extract_epi64(vidx_lo, 1); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x32() local 276 …h = _mm_loadu_si32((const void*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) vidx_lh)); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x32() 280 …l_lh, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) (vidx_lh >> 32))), 1); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x32() 338 const uint64_t vidx_lh = (uint64_t) _mm_extract_epi64(vidx_lo, 1); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x32() local 342 …h = _mm_loadu_si32((const void*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) vidx_lh)); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x32() 346 …l_lh, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) (vidx_lh >> 32))), 1); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x32()
|
D | velu-avx-rr2-lut16-p3-x40.c | 321 const uint64_t vidx_lh = (uint64_t) _mm_extract_epi64(vidx_lo, 1); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x40() local 325 …h = _mm_loadu_si32((const void*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) vidx_lh)); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x40() 329 …l_lh, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) (vidx_lh >> 32))), 1); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x40() 387 const uint64_t vidx_lh = (uint64_t) _mm_extract_epi64(vidx_lo, 1); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x40() local 391 …h = _mm_loadu_si32((const void*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) vidx_lh)); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x40() 395 …l_lh, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) (vidx_lh >> 32))), 1); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x40()
|
D | velu-avx-rr2-lut16-p3-x48.c | 370 const uint64_t vidx_lh = (uint64_t) _mm_extract_epi64(vidx_lo, 1); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x48() local 374 …h = _mm_loadu_si32((const void*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) vidx_lh)); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x48() 378 …l_lh, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) (vidx_lh >> 32))), 1); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x48() 436 const uint64_t vidx_lh = (uint64_t) _mm_extract_epi64(vidx_lo, 1); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x48() local 440 …h = _mm_loadu_si32((const void*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) vidx_lh)); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x48() 444 …l_lh, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) (vidx_lh >> 32))), 1); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x48()
|
/external/XNNPACK/src/math/ |
D | expm1minus-avx-rr2-lut16-p3.c | 77 const uint64_t vidx_lh = (uint64_t) _mm_extract_epi64(vidx_lo, 1); in xnn_math_f32_expm1minus__avx_rr2_lut16_p3() local 81 …mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) vidx_lh))); in xnn_math_f32_expm1minus__avx_rr2_lut16_p3() 85 …l_lh, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) (vidx_lh >> 32))), 1); in xnn_math_f32_expm1minus__avx_rr2_lut16_p3()
|
D | sigmoid-avx-rr2-lut64-p2-div.c | 81 const uint64_t vidx_lh = (uint64_t) _mm_extract_epi64(vidx_lo, 1); in xnn_math_f32_sigmoid__avx_rr2_lut64_p2_div() local 85 …mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uint32_t) vidx_lh))); in xnn_math_f32_sigmoid__avx_rr2_lut64_p2_div() 89 …l_lh, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uint32_t) (vidx_lh >> 32))), 1); in xnn_math_f32_sigmoid__avx_rr2_lut64_p2_div()
|
/external/XNNPACK/src/f32-velu/ |
D | avx-rr2-lut16-p3.c.in | 141 const uint64_t vidx_lh = (uint64_t) _mm_extract_epi64(vidx_lo, 1); variable 145 …h = _mm_loadu_si32((const void*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) vidx_lh)); 149 …l_lh, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) (vidx_lh >> 32))), 1); 207 const uint64_t vidx_lh = (uint64_t) _mm_extract_epi64(vidx_lo, 1); variable 211 …h = _mm_loadu_si32((const void*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) vidx_lh)); 215 …l_lh, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) (vidx_lh >> 32))), 1);
|