/external/XNNPACK/src/f32-vsqrt/gen/ |
D | neonfma-nr1rsqrts1fma1adj-x24.c | 110 const float32x4_t vyKLMN = vfmaq_f32(vsqrtxKLMN, vhalfrsqrtxKLMN, vadjustmentKLMN); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x24() local 117 vst1q_f32(y, vyKLMN); y += 4; in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x24()
|
D | neonfma-nr2fma1adj-x24.c | 109 const float32x4_t vyKLMN = vfmaq_f32(vsqrtxKLMN, vhalfrsqrtxKLMN, vadjustmentKLMN); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x24() local 116 vst1q_f32(y, vyKLMN); y += 4; in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x24()
|
D | neonfma-nr1rsqrts1fma1adj-x28.c | 121 const float32x4_t vyKLMN = vfmaq_f32(vsqrtxKLMN, vhalfrsqrtxKLMN, vadjustmentKLMN); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x28() local 129 vst1q_f32(y, vyKLMN); y += 4; in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x28()
|
D | neonfma-nr2fma1adj-x28.c | 120 const float32x4_t vyKLMN = vfmaq_f32(vsqrtxKLMN, vhalfrsqrtxKLMN, vadjustmentKLMN); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x28() local 128 vst1q_f32(y, vyKLMN); y += 4; in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x28()
|
D | neonfma-nr1rsqrts1fma1adj-x32.c | 132 const float32x4_t vyKLMN = vfmaq_f32(vsqrtxKLMN, vhalfrsqrtxKLMN, vadjustmentKLMN); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x32() local 141 vst1q_f32(y, vyKLMN); y += 4; in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x32()
|
D | neonfma-nr2fma1adj-x32.c | 131 const float32x4_t vyKLMN = vfmaq_f32(vsqrtxKLMN, vhalfrsqrtxKLMN, vadjustmentKLMN); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x32() local 140 vst1q_f32(y, vyKLMN); y += 4; in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x32()
|
D | neonfma-nr1rsqrts1fma1adj-x36.c | 143 const float32x4_t vyKLMN = vfmaq_f32(vsqrtxKLMN, vhalfrsqrtxKLMN, vadjustmentKLMN); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x36() local 153 vst1q_f32(y, vyKLMN); y += 4; in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x36()
|
D | neonfma-nr2fma1adj-x36.c | 142 const float32x4_t vyKLMN = vfmaq_f32(vsqrtxKLMN, vhalfrsqrtxKLMN, vadjustmentKLMN); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x36() local 152 vst1q_f32(y, vyKLMN); y += 4; in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x36()
|
D | neonfma-nr2fma1adj-x40.c | 153 const float32x4_t vyKLMN = vfmaq_f32(vsqrtxKLMN, vhalfrsqrtxKLMN, vadjustmentKLMN); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x40() local 164 vst1q_f32(y, vyKLMN); y += 4; in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x40()
|
D | neonfma-nr1rsqrts1fma1adj-x40.c | 154 const float32x4_t vyKLMN = vfmaq_f32(vsqrtxKLMN, vhalfrsqrtxKLMN, vadjustmentKLMN); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x40() local 165 vst1q_f32(y, vyKLMN); y += 4; in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x40()
|
/external/XNNPACK/src/qs8-vadd/gen/ |
D | minmax-xop-mul32-ld32-x24.c | 52 const __m128i vyKLMN = _mm_cvtepi8_epi32(_mm_loadu_si32(input_y + 20)); in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x24() local 68 vaccKLMN = _mm_macc_epi32(vyKLMN, vy_multiplier, vaccKLMN); in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x24()
|
D | minmax-sse41-mul32-ld32-x24.c | 47 const __m128i vyKLMN = _mm_cvtepi8_epi32(_mm_loadu_si32(input_y + 20)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x24() local 63 vaccKLMN = _mm_add_epi32(vaccKLMN, _mm_mullo_epi32(vyKLMN, vy_multiplier)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x24()
|
D | minmax-xop-mul32-ld32-x32.c | 52 const __m128i vyKLMN = _mm_cvtepi8_epi32(_mm_loadu_si32(input_y + 20)); in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x32() local 74 vaccKLMN = _mm_macc_epi32(vyKLMN, vy_multiplier, vaccKLMN); in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x32()
|
D | minmax-sse41-mul32-ld32-x32.c | 47 const __m128i vyKLMN = _mm_cvtepi8_epi32(_mm_loadu_si32(input_y + 20)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x32() local 69 vaccKLMN = _mm_add_epi32(vaccKLMN, _mm_mullo_epi32(vyKLMN, vy_multiplier)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x32()
|
/external/XNNPACK/src/f32-sigmoid/gen/ |
D | wasmsimd-lut64-p2-div-x24.c | 164 const v128_t vyKLMN = wasm_f32x4_sub(vsKLMN, wasm_f32x4_mul(vsKLMN, vpKLMN)); in xnn_f32_sigmoid_ukernel__wasmsimd_lut64_p2_div_x24() local 171 const v128_t vdKLMN = wasm_f32x4_add(vyKLMN, vone); in xnn_f32_sigmoid_ukernel__wasmsimd_lut64_p2_div_x24() 178 v128_t vfKLMN = wasm_f32x4_div(vyKLMN, vdKLMN); in xnn_f32_sigmoid_ukernel__wasmsimd_lut64_p2_div_x24()
|
D | neonfma-rr1-lut64-p2-div-x24.c | 157 const float32x4_t vyKLMN = vfmsq_f32(vsKLMN, vsKLMN, vpKLMN); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x24() local 164 const float32x4_t vdKLMN = vaddq_f32(vyKLMN, vone); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x24() 171 float32x4_t vfKLMN = vdivq_f32(vyKLMN, vdKLMN); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x24()
|
D | neonfma-rr1-lut2048-p1-div-x24.c | 149 const float32x4_t vyKLMN = vfmaq_f32(vsKLMN, vsKLMN, vpKLMN); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x24() local 156 const float32x4_t vdKLMN = vaddq_f32(vyKLMN, vone); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x24() 163 float32x4_t vfKLMN = vdivq_f32(vyKLMN, vdKLMN); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x24()
|
D | neonfma-rr1-lut2048-p1-nr2recps-x24.c | 149 const float32x4_t vyKLMN = vfmaq_f32(vsKLMN, vsKLMN, vpKLMN); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x24() local 156 const float32x4_t vdKLMN = vaddq_f32(vyKLMN, vone); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x24() 184 float32x4_t vfKLMN = vmulq_f32(vyKLMN, vrKLMN); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x24()
|
D | neonfma-rr1-lut64-p2-nr2recps-x24.c | 157 const float32x4_t vyKLMN = vfmsq_f32(vsKLMN, vsKLMN, vpKLMN); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x24() local 164 const float32x4_t vdKLMN = vaddq_f32(vyKLMN, vone); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x24() 192 float32x4_t vfKLMN = vmulq_f32(vyKLMN, vrKLMN); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x24()
|
D | neonfma-rr1-lut64-p2-nr2fma-x24.c | 157 const float32x4_t vyKLMN = vfmsq_f32(vsKLMN, vsKLMN, vpKLMN); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2fma_x24() local 164 const float32x4_t vdKLMN = vaddq_f32(vyKLMN, vone); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2fma_x24() 192 float32x4_t vfKLMN = vmulq_f32(vyKLMN, vrKLMN); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2fma_x24()
|
D | neonfma-rr1-lut2048-p1-nr1recps1fma-x24.c | 149 const float32x4_t vyKLMN = vfmaq_f32(vsKLMN, vsKLMN, vpKLMN); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x24() local 156 const float32x4_t vdKLMN = vaddq_f32(vyKLMN, vone); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x24() 184 float32x4_t vfKLMN = vmulq_f32(vyKLMN, vrKLMN); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x24()
|
D | neon-rr2-lut64-p2-nr2recps-x24.c | 165 const float32x4_t vyKLMN = vmlsq_f32(vsKLMN, vsKLMN, vpKLMN); in xnn_f32_sigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x24() local 172 const float32x4_t vdKLMN = vaddq_f32(vyKLMN, vone); in xnn_f32_sigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x24() 200 float32x4_t vfKLMN = vmulq_f32(vyKLMN, vrKLMN); in xnn_f32_sigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x24()
|
D | neon-rr2-lut2048-p1-nr2recps-x24.c | 157 const float32x4_t vyKLMN = vmlaq_f32(vsKLMN, vsKLMN, vpKLMN); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x24() local 164 const float32x4_t vdKLMN = vaddq_f32(vyKLMN, vone); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x24() 192 float32x4_t vfKLMN = vmulq_f32(vyKLMN, vrKLMN); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x24()
|
D | neonfma-rr1-lut2048-p1-nr2fma-x24.c | 149 const float32x4_t vyKLMN = vfmaq_f32(vsKLMN, vsKLMN, vpKLMN); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x24() local 156 const float32x4_t vdKLMN = vaddq_f32(vyKLMN, vone); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x24() 184 float32x4_t vfKLMN = vmulq_f32(vyKLMN, vrKLMN); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x24()
|
D | neonfma-rr1-lut64-p2-nr1recps1fma-x24.c | 157 const float32x4_t vyKLMN = vfmsq_f32(vsKLMN, vsKLMN, vpKLMN); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x24() local 164 const float32x4_t vdKLMN = vaddq_f32(vyKLMN, vone); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x24() 192 float32x4_t vfKLMN = vmulq_f32(vyKLMN, vrKLMN); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x24()
|