/external/XNNPACK/src/f32-sigmoid/gen/ |
D | wasmsimd-p5-div-x16.c | 103 const v128_t veCDEF = wasm_f32x4_add(vsCDEF, wasm_f32x4_mul(vtCDEF, vpCDEF)); in xnn_f32_sigmoid_ukernel__wasmsimd_p5_div_x16() local 108 const v128_t vdCDEF = wasm_f32x4_add(veCDEF, vone); in xnn_f32_sigmoid_ukernel__wasmsimd_p5_div_x16() 113 v128_t vfCDEF = wasm_f32x4_div(veCDEF, vdCDEF); in xnn_f32_sigmoid_ukernel__wasmsimd_p5_div_x16()
|
D | neonfma-rr1-p5-div-x16.c | 96 const float32x4_t veCDEF = vfmaq_f32(vsCDEF, vpCDEF, vtCDEF); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x16() local 101 const float32x4_t vdCDEF = vaddq_f32(veCDEF, vone); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x16() 106 float32x4_t vfCDEF = vdivq_f32(veCDEF, vdCDEF); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x16()
|
D | sse41-p5-div-x16.c | 103 __m128 veCDEF = _mm_add_ps(_mm_mul_ps(vtCDEF, vpCDEF), vsCDEF); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x16() local 108 __m128 vdCDEF = _mm_add_ps(veCDEF, vone); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x16() 113 __m128 vfCDEF = _mm_div_ps(veCDEF, vdCDEF); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x16()
|
D | wasmsimd-p5-div-x20.c | 115 const v128_t veCDEF = wasm_f32x4_add(vsCDEF, wasm_f32x4_mul(vtCDEF, vpCDEF)); in xnn_f32_sigmoid_ukernel__wasmsimd_p5_div_x20() local 121 const v128_t vdCDEF = wasm_f32x4_add(veCDEF, vone); in xnn_f32_sigmoid_ukernel__wasmsimd_p5_div_x20() 127 v128_t vfCDEF = wasm_f32x4_div(veCDEF, vdCDEF); in xnn_f32_sigmoid_ukernel__wasmsimd_p5_div_x20()
|
D | neonfma-rr1-p5-nr1recps1fma-x16.c | 96 const float32x4_t veCDEF = vfmaq_f32(vsCDEF, vpCDEF, vtCDEF); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x16() local 101 const float32x4_t vdCDEF = vaddq_f32(veCDEF, vone); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x16() 121 float32x4_t vfCDEF = vmulq_f32(veCDEF, vrCDEF); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x16()
|
D | neonfma-rr1-p5-nr2recps-x16.c | 96 const float32x4_t veCDEF = vfmaq_f32(vsCDEF, vpCDEF, vtCDEF); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x16() local 101 const float32x4_t vdCDEF = vaddq_f32(veCDEF, vone); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x16() 121 float32x4_t vfCDEF = vmulq_f32(veCDEF, vrCDEF); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x16()
|
D | neonfma-rr1-p5-div-x20.c | 107 const float32x4_t veCDEF = vfmaq_f32(vsCDEF, vpCDEF, vtCDEF); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x20() local 113 const float32x4_t vdCDEF = vaddq_f32(veCDEF, vone); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x20() 119 float32x4_t vfCDEF = vdivq_f32(veCDEF, vdCDEF); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x20()
|
D | sse41-p5-div-x20.c | 115 __m128 veCDEF = _mm_add_ps(_mm_mul_ps(vtCDEF, vpCDEF), vsCDEF); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x20() local 121 __m128 vdCDEF = _mm_add_ps(veCDEF, vone); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x20() 127 __m128 vfCDEF = _mm_div_ps(veCDEF, vdCDEF); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x20()
|
D | neonfma-rr1-p5-nr2fma-x16.c | 96 const float32x4_t veCDEF = vfmaq_f32(vsCDEF, vpCDEF, vtCDEF); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x16() local 101 const float32x4_t vdCDEF = vaddq_f32(veCDEF, vone); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x16() 121 float32x4_t vfCDEF = vmulq_f32(veCDEF, vrCDEF); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x16()
|
D | sse2-p5-div-x16.c | 103 __m128 veCDEF = _mm_add_ps(_mm_mul_ps(vtCDEF, vpCDEF), vsCDEF); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x16() local 108 __m128 vdCDEF = _mm_add_ps(veCDEF, vone); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x16() 113 __m128 vfCDEF = _mm_div_ps(veCDEF, vdCDEF); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x16()
|
D | neon-rr2-p5-nr2recps-x16.c | 102 const float32x4_t veCDEF = vmlaq_f32(vsCDEF, vpCDEF, vtCDEF); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x16() local 107 const float32x4_t vdCDEF = vaddq_f32(veCDEF, vone); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x16() 127 float32x4_t vfCDEF = vmulq_f32(veCDEF, vrCDEF); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x16()
|
D | sse2-p5-div-x20.c | 115 __m128 veCDEF = _mm_add_ps(_mm_mul_ps(vtCDEF, vpCDEF), vsCDEF); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x20() local 121 __m128 vdCDEF = _mm_add_ps(veCDEF, vone); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x20() 127 __m128 vfCDEF = _mm_div_ps(veCDEF, vdCDEF); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x20()
|
D | neonfma-rr1-p5-div-x24.c | 118 const float32x4_t veCDEF = vfmaq_f32(vsCDEF, vpCDEF, vtCDEF); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x24() local 125 const float32x4_t vdCDEF = vaddq_f32(veCDEF, vone); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x24() 132 float32x4_t vfCDEF = vdivq_f32(veCDEF, vdCDEF); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x24()
|
D | neonfma-rr1-p5-nr1recps1fma-x20.c | 107 const float32x4_t veCDEF = vfmaq_f32(vsCDEF, vpCDEF, vtCDEF); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x20() local 113 const float32x4_t vdCDEF = vaddq_f32(veCDEF, vone); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x20() 137 float32x4_t vfCDEF = vmulq_f32(veCDEF, vrCDEF); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x20()
|
D | neonfma-rr1-p5-nr2recps-x20.c | 107 const float32x4_t veCDEF = vfmaq_f32(vsCDEF, vpCDEF, vtCDEF); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x20() local 113 const float32x4_t vdCDEF = vaddq_f32(veCDEF, vone); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x20() 137 float32x4_t vfCDEF = vmulq_f32(veCDEF, vrCDEF); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x20()
|
D | wasmsimd-p5-div-x24.c | 127 const v128_t veCDEF = wasm_f32x4_add(vsCDEF, wasm_f32x4_mul(vtCDEF, vpCDEF)); in xnn_f32_sigmoid_ukernel__wasmsimd_p5_div_x24() local 134 const v128_t vdCDEF = wasm_f32x4_add(veCDEF, vone); in xnn_f32_sigmoid_ukernel__wasmsimd_p5_div_x24() 141 v128_t vfCDEF = wasm_f32x4_div(veCDEF, vdCDEF); in xnn_f32_sigmoid_ukernel__wasmsimd_p5_div_x24()
|
D | sse41-p5-div-x24.c | 127 __m128 veCDEF = _mm_add_ps(_mm_mul_ps(vtCDEF, vpCDEF), vsCDEF); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x24() local 134 __m128 vdCDEF = _mm_add_ps(veCDEF, vone); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x24() 141 __m128 vfCDEF = _mm_div_ps(veCDEF, vdCDEF); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x24()
|
D | neon-rr2-p5-nr2recps-x20.c | 114 const float32x4_t veCDEF = vmlaq_f32(vsCDEF, vpCDEF, vtCDEF); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x20() local 120 const float32x4_t vdCDEF = vaddq_f32(veCDEF, vone); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x20() 144 float32x4_t vfCDEF = vmulq_f32(veCDEF, vrCDEF); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x20()
|
D | neonfma-rr1-p5-nr2fma-x20.c | 107 const float32x4_t veCDEF = vfmaq_f32(vsCDEF, vpCDEF, vtCDEF); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x20() local 113 const float32x4_t vdCDEF = vaddq_f32(veCDEF, vone); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x20() 137 float32x4_t vfCDEF = vmulq_f32(veCDEF, vrCDEF); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x20()
|
D | neonfma-rr1-p5-nr1recps1fma-x24.c | 118 const float32x4_t veCDEF = vfmaq_f32(vsCDEF, vpCDEF, vtCDEF); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x24() local 125 const float32x4_t vdCDEF = vaddq_f32(veCDEF, vone); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x24() 153 float32x4_t vfCDEF = vmulq_f32(veCDEF, vrCDEF); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x24()
|
D | sse2-p5-div-x24.c | 127 __m128 veCDEF = _mm_add_ps(_mm_mul_ps(vtCDEF, vpCDEF), vsCDEF); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x24() local 134 __m128 vdCDEF = _mm_add_ps(veCDEF, vone); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x24() 141 __m128 vfCDEF = _mm_div_ps(veCDEF, vdCDEF); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x24()
|
D | neonfma-rr1-p5-nr2recps-x24.c | 118 const float32x4_t veCDEF = vfmaq_f32(vsCDEF, vpCDEF, vtCDEF); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x24() local 125 const float32x4_t vdCDEF = vaddq_f32(veCDEF, vone); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x24() 153 float32x4_t vfCDEF = vmulq_f32(veCDEF, vrCDEF); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x24()
|
D | neonfma-rr1-p5-nr2fma-x24.c | 118 const float32x4_t veCDEF = vfmaq_f32(vsCDEF, vpCDEF, vtCDEF); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x24() local 125 const float32x4_t vdCDEF = vaddq_f32(veCDEF, vone); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x24() 153 float32x4_t vfCDEF = vmulq_f32(veCDEF, vrCDEF); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x24()
|
D | neon-rr2-p5-nr2recps-x24.c | 126 const float32x4_t veCDEF = vmlaq_f32(vsCDEF, vpCDEF, vtCDEF); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x24() local 133 const float32x4_t vdCDEF = vaddq_f32(veCDEF, vone); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x24() 161 float32x4_t vfCDEF = vmulq_f32(veCDEF, vrCDEF); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x24()
|
/external/XNNPACK/src/f32-velu/gen/ |
D | velu-sse41-rr2-p6-x16.c | 124 const __m128 veCDEF = _mm_mul_ps(_mm_add_ps(vpCDEF, vsCDEF), valpha); in xnn_f32_velu_ukernel__sse41_rr2_p6_x16() local 134 const __m128 vyCDEF = _mm_blendv_ps(vxCDEF, veCDEF, vxCDEF); in xnn_f32_velu_ukernel__sse41_rr2_p6_x16()
|