/external/XNNPACK/src/f32-sigmoid/gen/ |
D | sse41-p5-div-x8.c | 77 __m128 ve4567 = _mm_add_ps(_mm_mul_ps(vt4567, vp4567), vs4567); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x8() local 80 __m128 vd4567 = _mm_add_ps(ve4567, vone); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x8() 83 __m128 vf4567 = _mm_div_ps(ve4567, vd4567); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x8()
|
D | neonfma-rr1-p5-div-x8.c | 72 const float32x4_t ve4567 = vfmaq_f32(vs4567, vp4567, vt4567); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x8() local 75 const float32x4_t vd4567 = vaddq_f32(ve4567, vone); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x8() 78 float32x4_t vf4567 = vdivq_f32(ve4567, vd4567); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x8()
|
D | wasmsimd-p5-div-x8.c | 77 const v128_t ve4567 = wasm_f32x4_add(vs4567, wasm_f32x4_mul(vt4567, vp4567)); in xnn_f32_sigmoid_ukernel__wasmsimd_p5_div_x8() local 80 const v128_t vd4567 = wasm_f32x4_add(ve4567, vone); in xnn_f32_sigmoid_ukernel__wasmsimd_p5_div_x8() 83 v128_t vf4567 = wasm_f32x4_div(ve4567, vd4567); in xnn_f32_sigmoid_ukernel__wasmsimd_p5_div_x8()
|
D | neonfma-rr1-p5-nr2recps-x8.c | 72 const float32x4_t ve4567 = vfmaq_f32(vs4567, vp4567, vt4567); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x8() local 75 const float32x4_t vd4567 = vaddq_f32(ve4567, vone); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x8() 87 float32x4_t vf4567 = vmulq_f32(ve4567, vr4567); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x8()
|
D | sse2-p5-div-x8.c | 77 __m128 ve4567 = _mm_add_ps(_mm_mul_ps(vt4567, vp4567), vs4567); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x8() local 80 __m128 vd4567 = _mm_add_ps(ve4567, vone); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x8() 83 __m128 vf4567 = _mm_div_ps(ve4567, vd4567); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x8()
|
D | neonfma-rr1-p5-nr1recps1fma-x8.c | 72 const float32x4_t ve4567 = vfmaq_f32(vs4567, vp4567, vt4567); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x8() local 75 const float32x4_t vd4567 = vaddq_f32(ve4567, vone); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x8() 87 float32x4_t vf4567 = vmulq_f32(ve4567, vr4567); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x8()
|
D | neon-rr2-p5-nr2recps-x8.c | 76 const float32x4_t ve4567 = vmlaq_f32(vs4567, vp4567, vt4567); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x8() local 79 const float32x4_t vd4567 = vaddq_f32(ve4567, vone); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x8() 91 float32x4_t vf4567 = vmulq_f32(ve4567, vr4567); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x8()
|
D | neonfma-rr1-p5-nr2fma-x8.c | 72 const float32x4_t ve4567 = vfmaq_f32(vs4567, vp4567, vt4567); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x8() local 75 const float32x4_t vd4567 = vaddq_f32(ve4567, vone); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x8() 87 float32x4_t vf4567 = vmulq_f32(ve4567, vr4567); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x8()
|
D | neonfma-rr1-p5-div-x12.c | 83 const float32x4_t ve4567 = vfmaq_f32(vs4567, vp4567, vt4567); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x12() local 87 const float32x4_t vd4567 = vaddq_f32(ve4567, vone); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x12() 91 float32x4_t vf4567 = vdivq_f32(ve4567, vd4567); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x12()
|
D | sse41-p5-div-x12.c | 89 __m128 ve4567 = _mm_add_ps(_mm_mul_ps(vt4567, vp4567), vs4567); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x12() local 93 __m128 vd4567 = _mm_add_ps(ve4567, vone); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x12() 97 __m128 vf4567 = _mm_div_ps(ve4567, vd4567); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x12()
|
D | wasmsimd-p5-div-x12.c | 89 const v128_t ve4567 = wasm_f32x4_add(vs4567, wasm_f32x4_mul(vt4567, vp4567)); in xnn_f32_sigmoid_ukernel__wasmsimd_p5_div_x12() local 93 const v128_t vd4567 = wasm_f32x4_add(ve4567, vone); in xnn_f32_sigmoid_ukernel__wasmsimd_p5_div_x12() 97 v128_t vf4567 = wasm_f32x4_div(ve4567, vd4567); in xnn_f32_sigmoid_ukernel__wasmsimd_p5_div_x12()
|
D | neonfma-rr1-p5-nr1recps1fma-x12.c | 83 const float32x4_t ve4567 = vfmaq_f32(vs4567, vp4567, vt4567); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x12() local 87 const float32x4_t vd4567 = vaddq_f32(ve4567, vone); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x12() 103 float32x4_t vf4567 = vmulq_f32(ve4567, vr4567); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x12()
|
D | wasmsimd-p5-div-x16.c | 101 const v128_t ve4567 = wasm_f32x4_add(vs4567, wasm_f32x4_mul(vt4567, vp4567)); in xnn_f32_sigmoid_ukernel__wasmsimd_p5_div_x16() local 106 const v128_t vd4567 = wasm_f32x4_add(ve4567, vone); in xnn_f32_sigmoid_ukernel__wasmsimd_p5_div_x16() 111 v128_t vf4567 = wasm_f32x4_div(ve4567, vd4567); in xnn_f32_sigmoid_ukernel__wasmsimd_p5_div_x16()
|
D | neonfma-rr1-p5-nr2fma-x12.c | 83 const float32x4_t ve4567 = vfmaq_f32(vs4567, vp4567, vt4567); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x12() local 87 const float32x4_t vd4567 = vaddq_f32(ve4567, vone); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x12() 103 float32x4_t vf4567 = vmulq_f32(ve4567, vr4567); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x12()
|
D | neonfma-rr1-p5-nr2recps-x12.c | 83 const float32x4_t ve4567 = vfmaq_f32(vs4567, vp4567, vt4567); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x12() local 87 const float32x4_t vd4567 = vaddq_f32(ve4567, vone); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x12() 103 float32x4_t vf4567 = vmulq_f32(ve4567, vr4567); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x12()
|
D | sse2-p5-div-x12.c | 89 __m128 ve4567 = _mm_add_ps(_mm_mul_ps(vt4567, vp4567), vs4567); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x12() local 93 __m128 vd4567 = _mm_add_ps(ve4567, vone); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x12() 97 __m128 vf4567 = _mm_div_ps(ve4567, vd4567); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x12()
|
D | neon-rr2-p5-nr2recps-x12.c | 88 const float32x4_t ve4567 = vmlaq_f32(vs4567, vp4567, vt4567); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x12() local 92 const float32x4_t vd4567 = vaddq_f32(ve4567, vone); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x12() 108 float32x4_t vf4567 = vmulq_f32(ve4567, vr4567); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x12()
|
D | neonfma-rr1-p5-div-x16.c | 94 const float32x4_t ve4567 = vfmaq_f32(vs4567, vp4567, vt4567); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x16() local 99 const float32x4_t vd4567 = vaddq_f32(ve4567, vone); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x16() 104 float32x4_t vf4567 = vdivq_f32(ve4567, vd4567); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x16()
|
D | sse41-p5-div-x16.c | 101 __m128 ve4567 = _mm_add_ps(_mm_mul_ps(vt4567, vp4567), vs4567); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x16() local 106 __m128 vd4567 = _mm_add_ps(ve4567, vone); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x16() 111 __m128 vf4567 = _mm_div_ps(ve4567, vd4567); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x16()
|
D | wasmsimd-p5-div-x20.c | 113 const v128_t ve4567 = wasm_f32x4_add(vs4567, wasm_f32x4_mul(vt4567, vp4567)); in xnn_f32_sigmoid_ukernel__wasmsimd_p5_div_x20() local 119 const v128_t vd4567 = wasm_f32x4_add(ve4567, vone); in xnn_f32_sigmoid_ukernel__wasmsimd_p5_div_x20() 125 v128_t vf4567 = wasm_f32x4_div(ve4567, vd4567); in xnn_f32_sigmoid_ukernel__wasmsimd_p5_div_x20()
|
D | neonfma-rr1-p5-nr1recps1fma-x16.c | 94 const float32x4_t ve4567 = vfmaq_f32(vs4567, vp4567, vt4567); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x16() local 99 const float32x4_t vd4567 = vaddq_f32(ve4567, vone); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x16() 119 float32x4_t vf4567 = vmulq_f32(ve4567, vr4567); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x16()
|
D | neonfma-rr1-p5-nr2recps-x16.c | 94 const float32x4_t ve4567 = vfmaq_f32(vs4567, vp4567, vt4567); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x16() local 99 const float32x4_t vd4567 = vaddq_f32(ve4567, vone); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x16() 119 float32x4_t vf4567 = vmulq_f32(ve4567, vr4567); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x16()
|
D | neonfma-rr1-p5-div-x20.c | 105 const float32x4_t ve4567 = vfmaq_f32(vs4567, vp4567, vt4567); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x20() local 111 const float32x4_t vd4567 = vaddq_f32(ve4567, vone); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x20() 117 float32x4_t vf4567 = vdivq_f32(ve4567, vd4567); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x20()
|
D | sse41-p5-div-x20.c | 113 __m128 ve4567 = _mm_add_ps(_mm_mul_ps(vt4567, vp4567), vs4567); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x20() local 119 __m128 vd4567 = _mm_add_ps(ve4567, vone); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x20() 125 __m128 vf4567 = _mm_div_ps(ve4567, vd4567); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x20()
|
D | neonfma-rr1-p5-nr2fma-x16.c | 94 const float32x4_t ve4567 = vfmaq_f32(vs4567, vp4567, vt4567); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x16() local 99 const float32x4_t vd4567 = vaddq_f32(ve4567, vone); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x16() 119 float32x4_t vf4567 = vmulq_f32(ve4567, vr4567); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x16()
|