/external/XNNPACK/src/f32-velu/gen/ |
D | velu-avx-rr2-lut16-p3-x8.c | 58 const uint64_t vidx_ll = (uint64_t) _mm_cvtsi128_si64(vidx_lo); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x8() local 62 …l = _mm_loadu_si32((const void*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) vidx_ll)); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x8() 66 …l_ll, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) (vidx_ll >> 32))), 1); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x8() 124 const uint64_t vidx_ll = (uint64_t) _mm_cvtsi128_si64(vidx_lo); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x8() local 128 …l = _mm_loadu_si32((const void*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) vidx_ll)); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x8() 132 …l_ll, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) (vidx_ll >> 32))), 1); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x8()
|
D | velu-avx-rr2-lut16-p3-x16.c | 173 const uint64_t vidx_ll = (uint64_t) _mm_cvtsi128_si64(vidx_lo); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x16() local 177 …l = _mm_loadu_si32((const void*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) vidx_ll)); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x16() 181 …l_ll, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) (vidx_ll >> 32))), 1); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x16() 239 const uint64_t vidx_ll = (uint64_t) _mm_cvtsi128_si64(vidx_lo); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x16() local 243 …l = _mm_loadu_si32((const void*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) vidx_ll)); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x16() 247 …l_ll, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) (vidx_ll >> 32))), 1); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x16()
|
D | velu-avx-rr2-lut16-p3-x24.c | 222 const uint64_t vidx_ll = (uint64_t) _mm_cvtsi128_si64(vidx_lo); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x24() local 226 …l = _mm_loadu_si32((const void*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) vidx_ll)); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x24() 230 …l_ll, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) (vidx_ll >> 32))), 1); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x24() 288 const uint64_t vidx_ll = (uint64_t) _mm_cvtsi128_si64(vidx_lo); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x24() local 292 …l = _mm_loadu_si32((const void*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) vidx_ll)); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x24() 296 …l_ll, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) (vidx_ll >> 32))), 1); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x24()
|
D | velu-avx-rr2-lut16-p3-x32.c | 271 const uint64_t vidx_ll = (uint64_t) _mm_cvtsi128_si64(vidx_lo); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x32() local 275 …l = _mm_loadu_si32((const void*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) vidx_ll)); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x32() 279 …l_ll, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) (vidx_ll >> 32))), 1); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x32() 337 const uint64_t vidx_ll = (uint64_t) _mm_cvtsi128_si64(vidx_lo); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x32() local 341 …l = _mm_loadu_si32((const void*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) vidx_ll)); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x32() 345 …l_ll, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) (vidx_ll >> 32))), 1); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x32()
|
D | velu-avx-rr2-lut16-p3-x40.c | 320 const uint64_t vidx_ll = (uint64_t) _mm_cvtsi128_si64(vidx_lo); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x40() local 324 …l = _mm_loadu_si32((const void*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) vidx_ll)); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x40() 328 …l_ll, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) (vidx_ll >> 32))), 1); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x40() 386 const uint64_t vidx_ll = (uint64_t) _mm_cvtsi128_si64(vidx_lo); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x40() local 390 …l = _mm_loadu_si32((const void*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) vidx_ll)); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x40() 394 …l_ll, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) (vidx_ll >> 32))), 1); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x40()
|
D | velu-avx-rr2-lut16-p3-x48.c | 369 const uint64_t vidx_ll = (uint64_t) _mm_cvtsi128_si64(vidx_lo); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x48() local 373 …l = _mm_loadu_si32((const void*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) vidx_ll)); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x48() 377 …l_ll, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) (vidx_ll >> 32))), 1); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x48() 435 const uint64_t vidx_ll = (uint64_t) _mm_cvtsi128_si64(vidx_lo); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x48() local 439 …l = _mm_loadu_si32((const void*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) vidx_ll)); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x48() 443 …l_ll, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) (vidx_ll >> 32))), 1); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x48()
|
/external/XNNPACK/src/math/ |
D | expm1minus-avx-rr2-lut16-p3.c | 76 const uint64_t vidx_ll = (uint64_t) _mm_cvtsi128_si64(vidx_lo); in xnn_math_f32_expm1minus__avx_rr2_lut16_p3() local 80 …mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) vidx_ll))); in xnn_math_f32_expm1minus__avx_rr2_lut16_p3() 84 …l_ll, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) (vidx_ll >> 32))), 1); in xnn_math_f32_expm1minus__avx_rr2_lut16_p3()
|
D | sigmoid-avx-rr2-lut64-p2-div.c | 80 const uint64_t vidx_ll = (uint64_t) _mm_cvtsi128_si64(vidx_lo); in xnn_math_f32_sigmoid__avx_rr2_lut64_p2_div() local 84 …mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uint32_t) vidx_ll))); in xnn_math_f32_sigmoid__avx_rr2_lut64_p2_div() 88 …l_ll, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uint32_t) (vidx_ll >> 32))), 1); in xnn_math_f32_sigmoid__avx_rr2_lut64_p2_div()
|
/external/XNNPACK/src/f32-velu/ |
D | avx-rr2-lut16-p3.c.in | 140 const uint64_t vidx_ll = (uint64_t) _mm_cvtsi128_si64(vidx_lo); 144 …l = _mm_loadu_si32((const void*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) vidx_ll)); 148 …l_ll, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) (vidx_ll >> 32))), 1); 206 const uint64_t vidx_ll = (uint64_t) _mm_cvtsi128_si64(vidx_lo); 210 …l = _mm_loadu_si32((const void*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) vidx_ll)); 214 …l_ll, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) (vidx_ll >> 32))), 1);
|