/external/XNNPACK/src/f32-velu/gen/ |
D | velu-avx-rr2-lut16-p3-x8.c | 64 …__m128i vl_hl = _mm_loadu_si32((const void*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_… in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x8() local 68 …vl_hl = _mm_insert_epi32(vl_hl, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint3… in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x8() 73 …__m128i vl_hl = _mm_loadu_si32((const void*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_… in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x8() local 77 …vl_hl = _mm_insert_epi32(vl_hl, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint3… in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x8() 84 const __m128i vl_hi = _mm_unpacklo_epi64(vl_hl, vl_hh); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x8() 130 …__m128i vl_hl = _mm_loadu_si32((const void*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_… in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x8() local 134 …vl_hl = _mm_insert_epi32(vl_hl, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint3… in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x8() 139 …__m128i vl_hl = _mm_loadu_si32((const void*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_… in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x8() local 143 …vl_hl = _mm_insert_epi32(vl_hl, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint3… in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x8() 150 const __m128i vl_hi = _mm_unpacklo_epi64(vl_hl, vl_hh); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x8()
|
D | velu-sse41-rr2-lut16-p3-x4.c | 59 …const __m128i vl_hl = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 … in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x4() local 61 …const __m128i vl_hi = _mm_insert_epi32(vl_hl, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_ov… in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x4() 64 …const __m128i vl_hl = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 … in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x4() local 66 …const __m128i vl_hi = _mm_insert_epi32(vl_hl, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_ov… in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x4() 102 …const __m128i vl_hl = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 … in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x4() local 104 …const __m128i vl_hi = _mm_insert_epi32(vl_hl, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_ov… in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x4() 107 …const __m128i vl_hl = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 … in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x4() local 109 …const __m128i vl_hi = _mm_insert_epi32(vl_hl, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_ov… in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x4()
|
D | velu-sse2-rr2-lut16-p3-x4.c | 59 …const __m128i vl_hl = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 … in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x4() local 63 const __m128i vl_hi = _mm_unpacklo_epi32(vl_hl, vl_hh); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x4() 66 …const __m128i vl_hl = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 … in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x4() local 70 const __m128i vl_hi = _mm_unpacklo_epi32(vl_hl, vl_hh); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x4() 107 …const __m128i vl_hl = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 … in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x4() local 111 const __m128i vl_hi = _mm_unpacklo_epi32(vl_hl, vl_hh); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x4() 114 …const __m128i vl_hl = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 … in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x4() local 118 const __m128i vl_hi = _mm_unpacklo_epi32(vl_hl, vl_hh); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x4()
|
D | velu-avx-rr2-lut16-p3-x16.c | 179 …__m128i vl_hl = _mm_loadu_si32((const void*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_… in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x16() local 183 …vl_hl = _mm_insert_epi32(vl_hl, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint3… in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x16() 188 …__m128i vl_hl = _mm_loadu_si32((const void*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_… in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x16() local 192 …vl_hl = _mm_insert_epi32(vl_hl, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint3… in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x16() 199 const __m128i vl_hi = _mm_unpacklo_epi64(vl_hl, vl_hh); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x16() 245 …__m128i vl_hl = _mm_loadu_si32((const void*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_… in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x16() local 249 …vl_hl = _mm_insert_epi32(vl_hl, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint3… in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x16() 254 …__m128i vl_hl = _mm_loadu_si32((const void*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_… in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x16() local 258 …vl_hl = _mm_insert_epi32(vl_hl, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint3… in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x16() 265 const __m128i vl_hi = _mm_unpacklo_epi64(vl_hl, vl_hh); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x16()
|
D | velu-avx-rr2-lut16-p3-x24.c | 228 …__m128i vl_hl = _mm_loadu_si32((const void*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_… in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x24() local 232 …vl_hl = _mm_insert_epi32(vl_hl, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint3… in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x24() 237 …__m128i vl_hl = _mm_loadu_si32((const void*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_… in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x24() local 241 …vl_hl = _mm_insert_epi32(vl_hl, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint3… in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x24() 248 const __m128i vl_hi = _mm_unpacklo_epi64(vl_hl, vl_hh); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x24() 294 …__m128i vl_hl = _mm_loadu_si32((const void*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_… in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x24() local 298 …vl_hl = _mm_insert_epi32(vl_hl, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint3… in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x24() 303 …__m128i vl_hl = _mm_loadu_si32((const void*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_… in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x24() local 307 …vl_hl = _mm_insert_epi32(vl_hl, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint3… in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x24() 314 const __m128i vl_hi = _mm_unpacklo_epi64(vl_hl, vl_hh); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x24()
|
D | velu-sse41-rr2-lut16-p3-x8.c | 149 …const __m128i vl_hl = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 … in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x8() local 151 …const __m128i vl_hi = _mm_insert_epi32(vl_hl, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_ov… in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x8() 154 …const __m128i vl_hl = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 … in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x8() local 156 …const __m128i vl_hi = _mm_insert_epi32(vl_hl, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_ov… in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x8() 192 …const __m128i vl_hl = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 … in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x8() local 194 …const __m128i vl_hi = _mm_insert_epi32(vl_hl, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_ov… in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x8() 197 …const __m128i vl_hl = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 … in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x8() local 199 …const __m128i vl_hi = _mm_insert_epi32(vl_hl, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_ov… in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x8()
|
D | velu-avx-rr2-lut16-p3-x32.c | 277 …__m128i vl_hl = _mm_loadu_si32((const void*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_… in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x32() local 281 …vl_hl = _mm_insert_epi32(vl_hl, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint3… in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x32() 286 …__m128i vl_hl = _mm_loadu_si32((const void*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_… in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x32() local 290 …vl_hl = _mm_insert_epi32(vl_hl, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint3… in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x32() 297 const __m128i vl_hi = _mm_unpacklo_epi64(vl_hl, vl_hh); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x32() 343 …__m128i vl_hl = _mm_loadu_si32((const void*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_… in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x32() local 347 …vl_hl = _mm_insert_epi32(vl_hl, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint3… in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x32() 352 …__m128i vl_hl = _mm_loadu_si32((const void*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_… in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x32() local 356 …vl_hl = _mm_insert_epi32(vl_hl, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint3… in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x32() 363 const __m128i vl_hi = _mm_unpacklo_epi64(vl_hl, vl_hh); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x32()
|
D | velu-sse2-rr2-lut16-p3-x8.c | 159 …const __m128i vl_hl = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 … in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x8() local 163 const __m128i vl_hi = _mm_unpacklo_epi32(vl_hl, vl_hh); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x8() 166 …const __m128i vl_hl = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 … in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x8() local 170 const __m128i vl_hi = _mm_unpacklo_epi32(vl_hl, vl_hh); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x8() 207 …const __m128i vl_hl = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 … in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x8() local 211 const __m128i vl_hi = _mm_unpacklo_epi32(vl_hl, vl_hh); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x8() 214 …const __m128i vl_hl = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 … in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x8() local 218 const __m128i vl_hi = _mm_unpacklo_epi32(vl_hl, vl_hh); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x8()
|
D | velu-sse41-rr2-lut16-p3-x12.c | 183 …const __m128i vl_hl = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 … in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x12() local 185 …const __m128i vl_hi = _mm_insert_epi32(vl_hl, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_ov… in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x12() 188 …const __m128i vl_hl = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 … in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x12() local 190 …const __m128i vl_hi = _mm_insert_epi32(vl_hl, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_ov… in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x12() 226 …const __m128i vl_hl = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 … in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x12() local 228 …const __m128i vl_hi = _mm_insert_epi32(vl_hl, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_ov… in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x12() 231 …const __m128i vl_hl = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 … in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x12() local 233 …const __m128i vl_hi = _mm_insert_epi32(vl_hl, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_ov… in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x12()
|
D | velu-avx-rr2-lut16-p3-x40.c | 326 …__m128i vl_hl = _mm_loadu_si32((const void*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_… in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x40() local 330 …vl_hl = _mm_insert_epi32(vl_hl, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint3… in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x40() 335 …__m128i vl_hl = _mm_loadu_si32((const void*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_… in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x40() local 339 …vl_hl = _mm_insert_epi32(vl_hl, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint3… in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x40() 346 const __m128i vl_hi = _mm_unpacklo_epi64(vl_hl, vl_hh); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x40() 392 …__m128i vl_hl = _mm_loadu_si32((const void*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_… in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x40() local 396 …vl_hl = _mm_insert_epi32(vl_hl, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint3… in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x40() 401 …__m128i vl_hl = _mm_loadu_si32((const void*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_… in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x40() local 405 …vl_hl = _mm_insert_epi32(vl_hl, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint3… in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x40() 412 const __m128i vl_hi = _mm_unpacklo_epi64(vl_hl, vl_hh); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x40()
|
D | velu-sse2-rr2-lut16-p3-x12.c | 198 …const __m128i vl_hl = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 … in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x12() local 202 const __m128i vl_hi = _mm_unpacklo_epi32(vl_hl, vl_hh); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x12() 205 …const __m128i vl_hl = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 … in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x12() local 209 const __m128i vl_hi = _mm_unpacklo_epi32(vl_hl, vl_hh); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x12() 246 …const __m128i vl_hl = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 … in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x12() local 250 const __m128i vl_hi = _mm_unpacklo_epi32(vl_hl, vl_hh); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x12() 253 …const __m128i vl_hl = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 … in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x12() local 257 const __m128i vl_hi = _mm_unpacklo_epi32(vl_hl, vl_hh); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x12()
|
D | velu-sse41-rr2-lut16-p3-x16.c | 217 …const __m128i vl_hl = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 … in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x16() local 219 …const __m128i vl_hi = _mm_insert_epi32(vl_hl, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_ov… in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x16() 222 …const __m128i vl_hl = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 … in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x16() local 224 …const __m128i vl_hi = _mm_insert_epi32(vl_hl, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_ov… in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x16() 260 …const __m128i vl_hl = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 … in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x16() local 262 …const __m128i vl_hi = _mm_insert_epi32(vl_hl, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_ov… in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x16() 265 …const __m128i vl_hl = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 … in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x16() local 267 …const __m128i vl_hi = _mm_insert_epi32(vl_hl, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_ov… in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x16()
|
D | velu-avx-rr2-lut16-p3-x48.c | 375 …__m128i vl_hl = _mm_loadu_si32((const void*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_… in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x48() local 379 …vl_hl = _mm_insert_epi32(vl_hl, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint3… in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x48() 384 …__m128i vl_hl = _mm_loadu_si32((const void*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_… in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x48() local 388 …vl_hl = _mm_insert_epi32(vl_hl, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint3… in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x48() 395 const __m128i vl_hi = _mm_unpacklo_epi64(vl_hl, vl_hh); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x48() 441 …__m128i vl_hl = _mm_loadu_si32((const void*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_… in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x48() local 445 …vl_hl = _mm_insert_epi32(vl_hl, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint3… in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x48() 450 …__m128i vl_hl = _mm_loadu_si32((const void*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_… in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x48() local 454 …vl_hl = _mm_insert_epi32(vl_hl, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint3… in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x48() 461 const __m128i vl_hi = _mm_unpacklo_epi64(vl_hl, vl_hh); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x48()
|
/external/XNNPACK/src/math/ |
D | expm1minus-avx-rr2-lut16-p3.c | 82 …__m128i vl_hl = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uin… in xnn_math_f32_expm1minus__avx_rr2_lut16_p3() local 86 …vl_hl = _mm_insert_epi32(vl_hl, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint3… in xnn_math_f32_expm1minus__avx_rr2_lut16_p3() 91 …__m128i vl_hl = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uin… in xnn_math_f32_expm1minus__avx_rr2_lut16_p3() local 95 …vl_hl = _mm_insert_epi32(vl_hl, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint3… in xnn_math_f32_expm1minus__avx_rr2_lut16_p3() 99 const __m128i vl_hi = _mm_unpacklo_epi64(vl_hl, vl_hh); in xnn_math_f32_expm1minus__avx_rr2_lut16_p3()
|
D | sigmoid-avx-rr2-lut64-p2-div.c | 86 …__m128i vl_hl = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uin… in xnn_math_f32_sigmoid__avx_rr2_lut64_p2_div() local 90 …vl_hl = _mm_insert_epi32(vl_hl, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uint3… in xnn_math_f32_sigmoid__avx_rr2_lut64_p2_div() 95 …__m128i vl_hl = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uin… in xnn_math_f32_sigmoid__avx_rr2_lut64_p2_div() local 99 …vl_hl = _mm_insert_epi32(vl_hl, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uint3… in xnn_math_f32_sigmoid__avx_rr2_lut64_p2_div() 103 const __m128i vl_hi = _mm_unpacklo_epi64(vl_hl, vl_hh); in xnn_math_f32_sigmoid__avx_rr2_lut64_p2_div()
|
/external/XNNPACK/src/f32-sigmoid/gen/ |
D | sse41-lut64-p2-div-x4.c | 52 …const __m128i vl_hl = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 … in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x4() local 54 …const __m128i vl_hi = _mm_insert_epi32(vl_hl, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_ov… in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x4() 57 …const __m128i vl_hl = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 … in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x4() local 59 …const __m128i vl_hi = _mm_insert_epi32(vl_hl, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_ov… in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x4() 94 …const __m128i vl_hl = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 … in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x4() local 96 …const __m128i vl_hi = _mm_insert_epi32(vl_hl, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_ov… in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x4() 99 …const __m128i vl_hl = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 … in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x4() local 101 …const __m128i vl_hi = _mm_insert_epi32(vl_hl, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_ov… in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x4()
|
D | sse2-lut64-p2-div-x4.c | 52 …const __m128i vl_hl = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 … in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x4() local 56 const __m128i vl_hi = _mm_unpacklo_epi32(vl_hl, vl_hh); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x4() 59 …const __m128i vl_hl = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 … in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x4() local 63 const __m128i vl_hi = _mm_unpacklo_epi32(vl_hl, vl_hh); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x4() 99 …const __m128i vl_hl = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 … in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x4() local 103 const __m128i vl_hi = _mm_unpacklo_epi32(vl_hl, vl_hh); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x4() 106 …const __m128i vl_hl = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 … in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x4() local 110 const __m128i vl_hi = _mm_unpacklo_epi32(vl_hl, vl_hh); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x4()
|
D | sse41-lut64-p2-div-x8.c | 139 …const __m128i vl_hl = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 … in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x8() local 141 …const __m128i vl_hi = _mm_insert_epi32(vl_hl, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_ov… in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x8() 144 …const __m128i vl_hl = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 … in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x8() local 146 …const __m128i vl_hi = _mm_insert_epi32(vl_hl, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_ov… in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x8() 181 …const __m128i vl_hl = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 … in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x8() local 183 …const __m128i vl_hi = _mm_insert_epi32(vl_hl, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_ov… in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x8() 186 …const __m128i vl_hl = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 … in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x8() local 188 …const __m128i vl_hi = _mm_insert_epi32(vl_hl, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_ov… in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x8()
|
D | sse2-lut64-p2-div-x8.c | 150 …const __m128i vl_hl = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 … in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x8() local 154 const __m128i vl_hi = _mm_unpacklo_epi32(vl_hl, vl_hh); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x8() 157 …const __m128i vl_hl = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 … in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x8() local 161 const __m128i vl_hi = _mm_unpacklo_epi32(vl_hl, vl_hh); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x8() 197 …const __m128i vl_hl = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 … in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x8() local 201 const __m128i vl_hi = _mm_unpacklo_epi32(vl_hl, vl_hh); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x8() 204 …const __m128i vl_hl = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 … in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x8() local 208 const __m128i vl_hi = _mm_unpacklo_epi32(vl_hl, vl_hh); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x8()
|
D | sse41-lut64-p2-div-x12.c | 171 …const __m128i vl_hl = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 … in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x12() local 173 …const __m128i vl_hi = _mm_insert_epi32(vl_hl, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_ov… in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x12() 176 …const __m128i vl_hl = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 … in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x12() local 178 …const __m128i vl_hi = _mm_insert_epi32(vl_hl, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_ov… in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x12() 213 …const __m128i vl_hl = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 … in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x12() local 215 …const __m128i vl_hi = _mm_insert_epi32(vl_hl, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_ov… in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x12() 218 …const __m128i vl_hl = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 … in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x12() local 220 …const __m128i vl_hi = _mm_insert_epi32(vl_hl, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_ov… in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x12()
|
D | sse41-lut64-p2-div-x16.c | 203 …const __m128i vl_hl = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 … in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x16() local 205 …const __m128i vl_hi = _mm_insert_epi32(vl_hl, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_ov… in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x16() 208 …const __m128i vl_hl = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 … in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x16() local 210 …const __m128i vl_hi = _mm_insert_epi32(vl_hl, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_ov… in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x16() 245 …const __m128i vl_hl = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 … in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x16() local 247 …const __m128i vl_hi = _mm_insert_epi32(vl_hl, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_ov… in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x16() 250 …const __m128i vl_hl = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 … in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x16() local 252 …const __m128i vl_hi = _mm_insert_epi32(vl_hl, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_ov… in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x16()
|
D | sse2-lut64-p2-div-x12.c | 187 …const __m128i vl_hl = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 … in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x12() local 191 const __m128i vl_hi = _mm_unpacklo_epi32(vl_hl, vl_hh); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x12() 194 …const __m128i vl_hl = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 … in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x12() local 198 const __m128i vl_hi = _mm_unpacklo_epi32(vl_hl, vl_hh); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x12() 234 …const __m128i vl_hl = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 … in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x12() local 238 const __m128i vl_hi = _mm_unpacklo_epi32(vl_hl, vl_hh); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x12() 241 …const __m128i vl_hl = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 … in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x12() local 245 const __m128i vl_hi = _mm_unpacklo_epi32(vl_hl, vl_hh); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x12()
|
/external/XNNPACK/src/f32-velu/ |
D | avx-rr2-lut16-p3.c.in | 146 …__m128i vl_hl = _mm_loadu_si32((const void*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_… variable 150 …vl_hl = _mm_insert_epi32(vl_hl, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint3… 155 …__m128i vl_hl = _mm_loadu_si32((const void*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_… variable 159 …vl_hl = _mm_insert_epi32(vl_hl, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint3… 166 const __m128i vl_hi = _mm_unpacklo_epi64(vl_hl, vl_hh); 212 …__m128i vl_hl = _mm_loadu_si32((const void*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_… variable 216 …vl_hl = _mm_insert_epi32(vl_hl, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint3… 221 …__m128i vl_hl = _mm_loadu_si32((const void*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_… variable 225 …vl_hl = _mm_insert_epi32(vl_hl, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint3… 232 const __m128i vl_hi = _mm_unpacklo_epi64(vl_hl, vl_hh);
|
D | sse-rr2-lut16-p3.c.in | 163 …const __m128i vl_hl = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 … variable 170 …const __m128i vl_hi = _mm_insert_epi32(vl_hl, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_ov… 173 const __m128i vl_hi = _mm_unpacklo_epi32(vl_hl, vl_hh); 176 …const __m128i vl_hl = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 … variable 183 …const __m128i vl_hi = _mm_insert_epi32(vl_hl, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_ov… 186 const __m128i vl_hi = _mm_unpacklo_epi32(vl_hl, vl_hh); 230 …const __m128i vl_hl = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 … variable 237 …const __m128i vl_hi = _mm_insert_epi32(vl_hl, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_ov… 240 const __m128i vl_hi = _mm_unpacklo_epi32(vl_hl, vl_hh); 243 …const __m128i vl_hl = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 … variable [all …]
|
/external/XNNPACK/src/f32-sigmoid/ |
D | sse-lut64-p2-div.c.in | 158 …const __m128i vl_hl = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 … variable 165 …const __m128i vl_hi = _mm_insert_epi32(vl_hl, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_ov… 168 const __m128i vl_hi = _mm_unpacklo_epi32(vl_hl, vl_hh); 171 …const __m128i vl_hl = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 … variable 178 …const __m128i vl_hi = _mm_insert_epi32(vl_hl, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_ov… 181 const __m128i vl_hi = _mm_unpacklo_epi32(vl_hl, vl_hh); 223 …const __m128i vl_hl = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 … variable 230 …const __m128i vl_hi = _mm_insert_epi32(vl_hl, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_ov… 233 const __m128i vl_hi = _mm_unpacklo_epi32(vl_hl, vl_hh); 236 …const __m128i vl_hl = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 … variable [all …]
|