/external/XNNPACK/src/f32-velu/gen/ |
D | velu-avx-rr2-lut16-p3-x8.c | 62 …__m128i vl_ll = _mm_loadu_si32((const void*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_… in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x8() local 66 …vl_ll = _mm_insert_epi32(vl_ll, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint3… in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x8() 71 …__m128i vl_ll = _mm_loadu_si32((const void*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_… in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x8() local 75 …vl_ll = _mm_insert_epi32(vl_ll, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint3… in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x8() 83 const __m128i vl_lo = _mm_unpacklo_epi64(vl_ll, vl_lh); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x8() 128 …__m128i vl_ll = _mm_loadu_si32((const void*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_… in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x8() local 132 …vl_ll = _mm_insert_epi32(vl_ll, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint3… in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x8() 137 …__m128i vl_ll = _mm_loadu_si32((const void*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_… in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x8() local 141 …vl_ll = _mm_insert_epi32(vl_ll, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint3… in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x8() 149 const __m128i vl_lo = _mm_unpacklo_epi64(vl_ll, vl_lh); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x8()
|
D | velu-sse41-rr2-lut16-p3-x4.c | 58 …const __m128i vl_ll = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_1… in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x4() local 60 …const __m128i vl_lo = _mm_insert_epi32(vl_ll, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_ov… in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x4() 63 …const __m128i vl_ll = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 … in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x4() local 65 …const __m128i vl_lo = _mm_insert_epi32(vl_ll, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_ov… in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x4() 101 …const __m128i vl_ll = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_1… in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x4() local 103 …const __m128i vl_lo = _mm_insert_epi32(vl_ll, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_ov… in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x4() 106 …const __m128i vl_ll = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 … in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x4() local 108 …const __m128i vl_lo = _mm_insert_epi32(vl_ll, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_ov… in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x4()
|
D | velu-sse2-rr2-lut16-p3-x4.c | 58 …const __m128i vl_ll = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_1… in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x4() local 61 const __m128i vl_lo = _mm_unpacklo_epi32(vl_ll, vl_lh); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x4() 65 …const __m128i vl_ll = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 … in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x4() local 68 const __m128i vl_lo = _mm_unpacklo_epi32(vl_ll, vl_lh); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x4() 106 …const __m128i vl_ll = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_1… in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x4() local 109 const __m128i vl_lo = _mm_unpacklo_epi32(vl_ll, vl_lh); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x4() 113 …const __m128i vl_ll = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 … in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x4() local 116 const __m128i vl_lo = _mm_unpacklo_epi32(vl_ll, vl_lh); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x4()
|
D | velu-avx-rr2-lut16-p3-x16.c | 177 …__m128i vl_ll = _mm_loadu_si32((const void*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_… in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x16() local 181 …vl_ll = _mm_insert_epi32(vl_ll, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint3… in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x16() 186 …__m128i vl_ll = _mm_loadu_si32((const void*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_… in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x16() local 190 …vl_ll = _mm_insert_epi32(vl_ll, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint3… in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x16() 198 const __m128i vl_lo = _mm_unpacklo_epi64(vl_ll, vl_lh); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x16() 243 …__m128i vl_ll = _mm_loadu_si32((const void*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_… in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x16() local 247 …vl_ll = _mm_insert_epi32(vl_ll, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint3… in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x16() 252 …__m128i vl_ll = _mm_loadu_si32((const void*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_… in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x16() local 256 …vl_ll = _mm_insert_epi32(vl_ll, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint3… in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x16() 264 const __m128i vl_lo = _mm_unpacklo_epi64(vl_ll, vl_lh); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x16()
|
D | velu-avx-rr2-lut16-p3-x24.c | 226 …__m128i vl_ll = _mm_loadu_si32((const void*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_… in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x24() local 230 …vl_ll = _mm_insert_epi32(vl_ll, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint3… in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x24() 235 …__m128i vl_ll = _mm_loadu_si32((const void*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_… in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x24() local 239 …vl_ll = _mm_insert_epi32(vl_ll, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint3… in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x24() 247 const __m128i vl_lo = _mm_unpacklo_epi64(vl_ll, vl_lh); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x24() 292 …__m128i vl_ll = _mm_loadu_si32((const void*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_… in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x24() local 296 …vl_ll = _mm_insert_epi32(vl_ll, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint3… in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x24() 301 …__m128i vl_ll = _mm_loadu_si32((const void*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_… in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x24() local 305 …vl_ll = _mm_insert_epi32(vl_ll, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint3… in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x24() 313 const __m128i vl_lo = _mm_unpacklo_epi64(vl_ll, vl_lh); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x24()
|
D | velu-sse41-rr2-lut16-p3-x8.c | 148 …const __m128i vl_ll = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_1… in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x8() local 150 …const __m128i vl_lo = _mm_insert_epi32(vl_ll, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_ov… in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x8() 153 …const __m128i vl_ll = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 … in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x8() local 155 …const __m128i vl_lo = _mm_insert_epi32(vl_ll, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_ov… in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x8() 191 …const __m128i vl_ll = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_1… in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x8() local 193 …const __m128i vl_lo = _mm_insert_epi32(vl_ll, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_ov… in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x8() 196 …const __m128i vl_ll = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 … in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x8() local 198 …const __m128i vl_lo = _mm_insert_epi32(vl_ll, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_ov… in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x8()
|
D | velu-avx-rr2-lut16-p3-x32.c | 275 …__m128i vl_ll = _mm_loadu_si32((const void*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_… in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x32() local 279 …vl_ll = _mm_insert_epi32(vl_ll, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint3… in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x32() 284 …__m128i vl_ll = _mm_loadu_si32((const void*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_… in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x32() local 288 …vl_ll = _mm_insert_epi32(vl_ll, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint3… in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x32() 296 const __m128i vl_lo = _mm_unpacklo_epi64(vl_ll, vl_lh); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x32() 341 …__m128i vl_ll = _mm_loadu_si32((const void*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_… in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x32() local 345 …vl_ll = _mm_insert_epi32(vl_ll, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint3… in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x32() 350 …__m128i vl_ll = _mm_loadu_si32((const void*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_… in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x32() local 354 …vl_ll = _mm_insert_epi32(vl_ll, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint3… in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x32() 362 const __m128i vl_lo = _mm_unpacklo_epi64(vl_ll, vl_lh); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x32()
|
D | velu-sse2-rr2-lut16-p3-x8.c | 158 …const __m128i vl_ll = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_1… in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x8() local 161 const __m128i vl_lo = _mm_unpacklo_epi32(vl_ll, vl_lh); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x8() 165 …const __m128i vl_ll = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 … in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x8() local 168 const __m128i vl_lo = _mm_unpacklo_epi32(vl_ll, vl_lh); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x8() 206 …const __m128i vl_ll = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_1… in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x8() local 209 const __m128i vl_lo = _mm_unpacklo_epi32(vl_ll, vl_lh); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x8() 213 …const __m128i vl_ll = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 … in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x8() local 216 const __m128i vl_lo = _mm_unpacklo_epi32(vl_ll, vl_lh); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x8()
|
D | velu-sse41-rr2-lut16-p3-x12.c | 182 …const __m128i vl_ll = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_1… in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x12() local 184 …const __m128i vl_lo = _mm_insert_epi32(vl_ll, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_ov… in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x12() 187 …const __m128i vl_ll = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 … in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x12() local 189 …const __m128i vl_lo = _mm_insert_epi32(vl_ll, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_ov… in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x12() 225 …const __m128i vl_ll = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_1… in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x12() local 227 …const __m128i vl_lo = _mm_insert_epi32(vl_ll, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_ov… in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x12() 230 …const __m128i vl_ll = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 … in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x12() local 232 …const __m128i vl_lo = _mm_insert_epi32(vl_ll, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_ov… in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x12()
|
D | velu-avx-rr2-lut16-p3-x40.c | 324 …__m128i vl_ll = _mm_loadu_si32((const void*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_… in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x40() local 328 …vl_ll = _mm_insert_epi32(vl_ll, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint3… in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x40() 333 …__m128i vl_ll = _mm_loadu_si32((const void*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_… in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x40() local 337 …vl_ll = _mm_insert_epi32(vl_ll, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint3… in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x40() 345 const __m128i vl_lo = _mm_unpacklo_epi64(vl_ll, vl_lh); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x40() 390 …__m128i vl_ll = _mm_loadu_si32((const void*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_… in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x40() local 394 …vl_ll = _mm_insert_epi32(vl_ll, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint3… in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x40() 399 …__m128i vl_ll = _mm_loadu_si32((const void*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_… in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x40() local 403 …vl_ll = _mm_insert_epi32(vl_ll, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint3… in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x40() 411 const __m128i vl_lo = _mm_unpacklo_epi64(vl_ll, vl_lh); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x40()
|
D | velu-sse2-rr2-lut16-p3-x12.c | 197 …const __m128i vl_ll = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_1… in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x12() local 200 const __m128i vl_lo = _mm_unpacklo_epi32(vl_ll, vl_lh); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x12() 204 …const __m128i vl_ll = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 … in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x12() local 207 const __m128i vl_lo = _mm_unpacklo_epi32(vl_ll, vl_lh); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x12() 245 …const __m128i vl_ll = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_1… in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x12() local 248 const __m128i vl_lo = _mm_unpacklo_epi32(vl_ll, vl_lh); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x12() 252 …const __m128i vl_ll = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 … in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x12() local 255 const __m128i vl_lo = _mm_unpacklo_epi32(vl_ll, vl_lh); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x12()
|
D | velu-sse41-rr2-lut16-p3-x16.c | 216 …const __m128i vl_ll = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_1… in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x16() local 218 …const __m128i vl_lo = _mm_insert_epi32(vl_ll, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_ov… in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x16() 221 …const __m128i vl_ll = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 … in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x16() local 223 …const __m128i vl_lo = _mm_insert_epi32(vl_ll, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_ov… in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x16() 259 …const __m128i vl_ll = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_1… in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x16() local 261 …const __m128i vl_lo = _mm_insert_epi32(vl_ll, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_ov… in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x16() 264 …const __m128i vl_ll = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 … in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x16() local 266 …const __m128i vl_lo = _mm_insert_epi32(vl_ll, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_ov… in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x16()
|
D | velu-avx-rr2-lut16-p3-x48.c | 373 …__m128i vl_ll = _mm_loadu_si32((const void*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_… in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x48() local 377 …vl_ll = _mm_insert_epi32(vl_ll, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint3… in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x48() 382 …__m128i vl_ll = _mm_loadu_si32((const void*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_… in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x48() local 386 …vl_ll = _mm_insert_epi32(vl_ll, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint3… in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x48() 394 const __m128i vl_lo = _mm_unpacklo_epi64(vl_ll, vl_lh); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x48() 439 …__m128i vl_ll = _mm_loadu_si32((const void*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_… in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x48() local 443 …vl_ll = _mm_insert_epi32(vl_ll, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint3… in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x48() 448 …__m128i vl_ll = _mm_loadu_si32((const void*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_… in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x48() local 452 …vl_ll = _mm_insert_epi32(vl_ll, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint3… in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x48() 460 const __m128i vl_lo = _mm_unpacklo_epi64(vl_ll, vl_lh); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x48()
|
/external/XNNPACK/src/math/ |
D | expm1minus-avx-rr2-lut16-p3.c | 80 …__m128i vl_ll = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uin… in xnn_math_f32_expm1minus__avx_rr2_lut16_p3() local 84 …vl_ll = _mm_insert_epi32(vl_ll, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint3… in xnn_math_f32_expm1minus__avx_rr2_lut16_p3() 89 …__m128i vl_ll = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uin… in xnn_math_f32_expm1minus__avx_rr2_lut16_p3() local 93 …vl_ll = _mm_insert_epi32(vl_ll, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint3… in xnn_math_f32_expm1minus__avx_rr2_lut16_p3() 98 const __m128i vl_lo = _mm_unpacklo_epi64(vl_ll, vl_lh); in xnn_math_f32_expm1minus__avx_rr2_lut16_p3()
|
D | sigmoid-avx-rr2-lut64-p2-div.c | 84 …__m128i vl_ll = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uin… in xnn_math_f32_sigmoid__avx_rr2_lut64_p2_div() local 88 …vl_ll = _mm_insert_epi32(vl_ll, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uint3… in xnn_math_f32_sigmoid__avx_rr2_lut64_p2_div() 93 …__m128i vl_ll = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uin… in xnn_math_f32_sigmoid__avx_rr2_lut64_p2_div() local 97 …vl_ll = _mm_insert_epi32(vl_ll, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uint3… in xnn_math_f32_sigmoid__avx_rr2_lut64_p2_div() 102 const __m128i vl_lo = _mm_unpacklo_epi64(vl_ll, vl_lh); in xnn_math_f32_sigmoid__avx_rr2_lut64_p2_div()
|
/external/XNNPACK/src/f32-sigmoid/gen/ |
D | sse41-lut64-p2-div-x4.c | 51 …const __m128i vl_ll = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_6… in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x4() local 53 …const __m128i vl_lo = _mm_insert_epi32(vl_ll, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_ov… in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x4() 56 …const __m128i vl_ll = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 … in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x4() local 58 …const __m128i vl_lo = _mm_insert_epi32(vl_ll, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_ov… in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x4() 93 …const __m128i vl_ll = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_6… in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x4() local 95 …const __m128i vl_lo = _mm_insert_epi32(vl_ll, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_ov… in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x4() 98 …const __m128i vl_ll = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 … in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x4() local 100 …const __m128i vl_lo = _mm_insert_epi32(vl_ll, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_ov… in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x4()
|
D | sse2-lut64-p2-div-x4.c | 51 …const __m128i vl_ll = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_6… in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x4() local 54 const __m128i vl_lo = _mm_unpacklo_epi32(vl_ll, vl_lh); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x4() 58 …const __m128i vl_ll = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 … in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x4() local 61 const __m128i vl_lo = _mm_unpacklo_epi32(vl_ll, vl_lh); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x4() 98 …const __m128i vl_ll = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_6… in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x4() local 101 const __m128i vl_lo = _mm_unpacklo_epi32(vl_ll, vl_lh); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x4() 105 …const __m128i vl_ll = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 … in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x4() local 108 const __m128i vl_lo = _mm_unpacklo_epi32(vl_ll, vl_lh); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x4()
|
D | sse41-lut64-p2-div-x8.c | 138 …const __m128i vl_ll = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_6… in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x8() local 140 …const __m128i vl_lo = _mm_insert_epi32(vl_ll, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_ov… in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x8() 143 …const __m128i vl_ll = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 … in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x8() local 145 …const __m128i vl_lo = _mm_insert_epi32(vl_ll, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_ov… in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x8() 180 …const __m128i vl_ll = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_6… in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x8() local 182 …const __m128i vl_lo = _mm_insert_epi32(vl_ll, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_ov… in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x8() 185 …const __m128i vl_ll = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 … in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x8() local 187 …const __m128i vl_lo = _mm_insert_epi32(vl_ll, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_ov… in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x8()
|
D | sse2-lut64-p2-div-x8.c | 149 …const __m128i vl_ll = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_6… in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x8() local 152 const __m128i vl_lo = _mm_unpacklo_epi32(vl_ll, vl_lh); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x8() 156 …const __m128i vl_ll = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 … in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x8() local 159 const __m128i vl_lo = _mm_unpacklo_epi32(vl_ll, vl_lh); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x8() 196 …const __m128i vl_ll = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_6… in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x8() local 199 const __m128i vl_lo = _mm_unpacklo_epi32(vl_ll, vl_lh); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x8() 203 …const __m128i vl_ll = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 … in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x8() local 206 const __m128i vl_lo = _mm_unpacklo_epi32(vl_ll, vl_lh); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x8()
|
D | sse41-lut64-p2-div-x12.c | 170 …const __m128i vl_ll = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_6… in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x12() local 172 …const __m128i vl_lo = _mm_insert_epi32(vl_ll, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_ov… in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x12() 175 …const __m128i vl_ll = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 … in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x12() local 177 …const __m128i vl_lo = _mm_insert_epi32(vl_ll, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_ov… in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x12() 212 …const __m128i vl_ll = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_6… in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x12() local 214 …const __m128i vl_lo = _mm_insert_epi32(vl_ll, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_ov… in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x12() 217 …const __m128i vl_ll = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 … in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x12() local 219 …const __m128i vl_lo = _mm_insert_epi32(vl_ll, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_ov… in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x12()
|
D | sse41-lut64-p2-div-x16.c | 202 …const __m128i vl_ll = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_6… in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x16() local 204 …const __m128i vl_lo = _mm_insert_epi32(vl_ll, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_ov… in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x16() 207 …const __m128i vl_ll = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 … in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x16() local 209 …const __m128i vl_lo = _mm_insert_epi32(vl_ll, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_ov… in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x16() 244 …const __m128i vl_ll = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_6… in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x16() local 246 …const __m128i vl_lo = _mm_insert_epi32(vl_ll, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_ov… in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x16() 249 …const __m128i vl_ll = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 … in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x16() local 251 …const __m128i vl_lo = _mm_insert_epi32(vl_ll, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_ov… in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x16()
|
D | sse2-lut64-p2-div-x12.c | 186 …const __m128i vl_ll = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_6… in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x12() local 189 const __m128i vl_lo = _mm_unpacklo_epi32(vl_ll, vl_lh); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x12() 193 …const __m128i vl_ll = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 … in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x12() local 196 const __m128i vl_lo = _mm_unpacklo_epi32(vl_ll, vl_lh); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x12() 233 …const __m128i vl_ll = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_6… in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x12() local 236 const __m128i vl_lo = _mm_unpacklo_epi32(vl_ll, vl_lh); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x12() 240 …const __m128i vl_ll = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 … in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x12() local 243 const __m128i vl_lo = _mm_unpacklo_epi32(vl_ll, vl_lh); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x12()
|
/external/XNNPACK/src/f32-velu/ |
D | avx-rr2-lut16-p3.c.in | 144 …__m128i vl_ll = _mm_loadu_si32((const void*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_… variable 148 …vl_ll = _mm_insert_epi32(vl_ll, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint3… 153 …__m128i vl_ll = _mm_loadu_si32((const void*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_… variable 157 …vl_ll = _mm_insert_epi32(vl_ll, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint3… 165 const __m128i vl_lo = _mm_unpacklo_epi64(vl_ll, vl_lh); 210 …__m128i vl_ll = _mm_loadu_si32((const void*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_… variable 214 …vl_ll = _mm_insert_epi32(vl_ll, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint3… 219 …__m128i vl_ll = _mm_loadu_si32((const void*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_… variable 223 …vl_ll = _mm_insert_epi32(vl_ll, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint3… 231 const __m128i vl_lo = _mm_unpacklo_epi64(vl_ll, vl_lh);
|
D | sse-rr2-lut16-p3.c.in | 162 …const __m128i vl_ll = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_1… variable 165 …const __m128i vl_lo = _mm_insert_epi32(vl_ll, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_ov… 168 const __m128i vl_lo = _mm_unpacklo_epi32(vl_ll, vl_lh); 175 …const __m128i vl_ll = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 … variable 178 …const __m128i vl_lo = _mm_insert_epi32(vl_ll, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_ov… 181 const __m128i vl_lo = _mm_unpacklo_epi32(vl_ll, vl_lh); 229 …const __m128i vl_ll = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_1… variable 232 …const __m128i vl_lo = _mm_insert_epi32(vl_ll, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_ov… 235 const __m128i vl_lo = _mm_unpacklo_epi32(vl_ll, vl_lh); 242 …const __m128i vl_ll = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 … variable [all …]
|
/external/XNNPACK/src/f32-sigmoid/ |
D | sse-lut64-p2-div.c.in | 157 …const __m128i vl_ll = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_6… variable 160 …const __m128i vl_lo = _mm_insert_epi32(vl_ll, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_ov… 163 const __m128i vl_lo = _mm_unpacklo_epi32(vl_ll, vl_lh); 170 …const __m128i vl_ll = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 … variable 173 …const __m128i vl_lo = _mm_insert_epi32(vl_ll, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_ov… 176 const __m128i vl_lo = _mm_unpacklo_epi32(vl_ll, vl_lh); 222 …const __m128i vl_ll = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_6… variable 225 …const __m128i vl_lo = _mm_insert_epi32(vl_ll, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_ov… 228 const __m128i vl_lo = _mm_unpacklo_epi32(vl_ll, vl_lh); 235 …const __m128i vl_ll = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 … variable [all …]
|