/external/XNNPACK/src/f32-dwconv/gen/ |
D | up8x9-minmax-sse.c | 134 const __m128 vi5x0123 = _mm_loadu_ps(i5); in xnn_f32_dwconv_minmax_ukernel_up8x9__sse() local 140 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi5x0123, vk5x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x9__sse() 215 const __m128 vi5x0123 = _mm_loadu_ps(i5); in xnn_f32_dwconv_minmax_ukernel_up8x9__sse() local 219 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi5x0123, vk5x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x9__sse() 271 const __m128 vi5x0123 = _mm_loadu_ps(i5); in xnn_f32_dwconv_minmax_ukernel_up8x9__sse() local 273 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi5x0123, vk5x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x9__sse()
|
D | up8x9-wasmsimd.c | 132 const v128_t vi5x0123 = wasm_v128_load(i5); in xnn_f32_dwconv_ukernel_up8x9__wasmsimd() local 138 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi5x0123, vk5x0123)); in xnn_f32_dwconv_ukernel_up8x9__wasmsimd() 211 const v128_t vi5x0123 = wasm_v128_load(i5); in xnn_f32_dwconv_ukernel_up8x9__wasmsimd() local 215 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi5x0123, vk5x0123)); in xnn_f32_dwconv_ukernel_up8x9__wasmsimd() 266 const v128_t vi5x0123 = wasm_v128_load(i5); in xnn_f32_dwconv_ukernel_up8x9__wasmsimd() local 268 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi5x0123, vk5x0123)); in xnn_f32_dwconv_ukernel_up8x9__wasmsimd()
|
D | up8x9-minmax-neon.c | 125 const float32x4_t vi5x0123 = vld1q_f32(i5); i5 += 4; in xnn_f32_dwconv_minmax_ukernel_up8x9__neon() local 129 vacc0123p0 = vmlaq_f32(vacc0123p0, vi5x0123, vk5x0123); in xnn_f32_dwconv_minmax_ukernel_up8x9__neon() 186 const float32x4_t vi5x0123 = vld1q_f32(i5); i5 += 4; in xnn_f32_dwconv_minmax_ukernel_up8x9__neon() local 188 vacc0123p0 = vmlaq_f32(vacc0123p0, vi5x0123, vk5x0123); in xnn_f32_dwconv_minmax_ukernel_up8x9__neon() 232 const float32x4_t vi5x0123 = vld1q_f32(i5); in xnn_f32_dwconv_minmax_ukernel_up8x9__neon() local 234 vacc0123p0 = vmlaq_f32(vacc0123p0, vi5x0123, vk5x0123); in xnn_f32_dwconv_minmax_ukernel_up8x9__neon()
|
D | up8x9-minmax-sse-acc2.c | 134 const __m128 vi5x0123 = _mm_loadu_ps(i5); in xnn_f32_dwconv_minmax_ukernel_up8x9__sse_acc2() local 140 vacc0123p1 = _mm_add_ps(vacc0123p1, _mm_mul_ps(vi5x0123, vk5x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x9__sse_acc2() 218 const __m128 vi5x0123 = _mm_loadu_ps(i5); in xnn_f32_dwconv_minmax_ukernel_up8x9__sse_acc2() local 222 vacc0123p1 = _mm_add_ps(vacc0123p1, _mm_mul_ps(vi5x0123, vk5x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x9__sse_acc2() 276 const __m128 vi5x0123 = _mm_loadu_ps(i5); in xnn_f32_dwconv_minmax_ukernel_up8x9__sse_acc2() local 278 vacc0123p1 = _mm_add_ps(vacc0123p1, _mm_mul_ps(vi5x0123, vk5x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x9__sse_acc2()
|
D | up8x9-minmax-neonfma.c | 125 const float32x4_t vi5x0123 = vld1q_f32(i5); i5 += 4; in xnn_f32_dwconv_minmax_ukernel_up8x9__neonfma() local 129 vacc0123p0 = vfmaq_f32(vacc0123p0, vi5x0123, vk5x0123); in xnn_f32_dwconv_minmax_ukernel_up8x9__neonfma() 186 const float32x4_t vi5x0123 = vld1q_f32(i5); i5 += 4; in xnn_f32_dwconv_minmax_ukernel_up8x9__neonfma() local 188 vacc0123p0 = vfmaq_f32(vacc0123p0, vi5x0123, vk5x0123); in xnn_f32_dwconv_minmax_ukernel_up8x9__neonfma() 232 const float32x4_t vi5x0123 = vld1q_f32(i5); in xnn_f32_dwconv_minmax_ukernel_up8x9__neonfma() local 234 vacc0123p0 = vfmaq_f32(vacc0123p0, vi5x0123, vk5x0123); in xnn_f32_dwconv_minmax_ukernel_up8x9__neonfma()
|
D | up8x9-minmax-wasmsimd-arm-acc2.c | 134 const v128_t vi5x0123 = wasm_v128_load(i5); in xnn_f32_dwconv_minmax_ukernel_up8x9__wasmsimd_arm_acc2() local 140 vacc0123p1 = wasm_f32x4_add(vacc0123p1, wasm_f32x4_mul(vi5x0123, vk5x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x9__wasmsimd_arm_acc2() 219 const v128_t vi5x0123 = wasm_v128_load(i5); in xnn_f32_dwconv_minmax_ukernel_up8x9__wasmsimd_arm_acc2() local 223 vacc0123p1 = wasm_f32x4_add(vacc0123p1, wasm_f32x4_mul(vi5x0123, vk5x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x9__wasmsimd_arm_acc2() 277 const v128_t vi5x0123 = wasm_v128_load(i5); in xnn_f32_dwconv_minmax_ukernel_up8x9__wasmsimd_arm_acc2() local 279 vacc0123p1 = wasm_f32x4_add(vacc0123p1, wasm_f32x4_mul(vi5x0123, vk5x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x9__wasmsimd_arm_acc2()
|
D | up8x9-minmax-wasmsimd-x86.c | 134 const v128_t vi5x0123 = wasm_v128_load(i5); in xnn_f32_dwconv_minmax_ukernel_up8x9__wasmsimd_x86() local 140 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi5x0123, vk5x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x9__wasmsimd_x86() 216 const v128_t vi5x0123 = wasm_v128_load(i5); in xnn_f32_dwconv_minmax_ukernel_up8x9__wasmsimd_x86() local 220 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi5x0123, vk5x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x9__wasmsimd_x86() 272 const v128_t vi5x0123 = wasm_v128_load(i5); in xnn_f32_dwconv_minmax_ukernel_up8x9__wasmsimd_x86() local 274 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi5x0123, vk5x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x9__wasmsimd_x86()
|
D | up8x9-minmax-neonfma-acc2.c | 125 const float32x4_t vi5x0123 = vld1q_f32(i5); i5 += 4; in xnn_f32_dwconv_minmax_ukernel_up8x9__neonfma_acc2() local 129 vacc0123p1 = vfmaq_f32(vacc0123p1, vi5x0123, vk5x0123); in xnn_f32_dwconv_minmax_ukernel_up8x9__neonfma_acc2() 189 const float32x4_t vi5x0123 = vld1q_f32(i5); i5 += 4; in xnn_f32_dwconv_minmax_ukernel_up8x9__neonfma_acc2() local 191 vacc0123p1 = vfmaq_f32(vacc0123p1, vi5x0123, vk5x0123); in xnn_f32_dwconv_minmax_ukernel_up8x9__neonfma_acc2() 237 const float32x4_t vi5x0123 = vld1q_f32(i5); in xnn_f32_dwconv_minmax_ukernel_up8x9__neonfma_acc2() local 239 vacc0123p1 = vfmaq_f32(vacc0123p1, vi5x0123, vk5x0123); in xnn_f32_dwconv_minmax_ukernel_up8x9__neonfma_acc2()
|
D | up8x9-minmax-wasmsimd-arm.c | 134 const v128_t vi5x0123 = wasm_v128_load(i5); in xnn_f32_dwconv_minmax_ukernel_up8x9__wasmsimd_arm() local 140 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi5x0123, vk5x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x9__wasmsimd_arm() 216 const v128_t vi5x0123 = wasm_v128_load(i5); in xnn_f32_dwconv_minmax_ukernel_up8x9__wasmsimd_arm() local 220 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi5x0123, vk5x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x9__wasmsimd_arm() 272 const v128_t vi5x0123 = wasm_v128_load(i5); in xnn_f32_dwconv_minmax_ukernel_up8x9__wasmsimd_arm() local 274 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi5x0123, vk5x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x9__wasmsimd_arm()
|
D | up8x9-minmax-neon-acc2.c | 125 const float32x4_t vi5x0123 = vld1q_f32(i5); i5 += 4; in xnn_f32_dwconv_minmax_ukernel_up8x9__neon_acc2() local 129 vacc0123p1 = vmlaq_f32(vacc0123p1, vi5x0123, vk5x0123); in xnn_f32_dwconv_minmax_ukernel_up8x9__neon_acc2() 189 const float32x4_t vi5x0123 = vld1q_f32(i5); i5 += 4; in xnn_f32_dwconv_minmax_ukernel_up8x9__neon_acc2() local 191 vacc0123p1 = vmlaq_f32(vacc0123p1, vi5x0123, vk5x0123); in xnn_f32_dwconv_minmax_ukernel_up8x9__neon_acc2() 237 const float32x4_t vi5x0123 = vld1q_f32(i5); in xnn_f32_dwconv_minmax_ukernel_up8x9__neon_acc2() local 239 vacc0123p1 = vmlaq_f32(vacc0123p1, vi5x0123, vk5x0123); in xnn_f32_dwconv_minmax_ukernel_up8x9__neon_acc2()
|
D | up4x9-minmax-wasmsimd-arm.c | 118 const v128_t vi5x0123 = wasm_v128_load(i5); in xnn_f32_dwconv_minmax_ukernel_up4x9__wasmsimd_arm() local 122 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi5x0123, vk5x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x9__wasmsimd_arm() 175 const v128_t vi5x0123 = wasm_v128_load(i5); in xnn_f32_dwconv_minmax_ukernel_up4x9__wasmsimd_arm() local 177 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi5x0123, vk5x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x9__wasmsimd_arm()
|
D | up4x9-wasmsimd.c | 116 const v128_t vi5x0123 = wasm_v128_load(i5); in xnn_f32_dwconv_ukernel_up4x9__wasmsimd() local 120 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi5x0123, vk5x0123)); in xnn_f32_dwconv_ukernel_up4x9__wasmsimd() 171 const v128_t vi5x0123 = wasm_v128_load(i5); in xnn_f32_dwconv_ukernel_up4x9__wasmsimd() local 173 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi5x0123, vk5x0123)); in xnn_f32_dwconv_ukernel_up4x9__wasmsimd()
|
D | up4x9-minmax-sse.c | 118 const __m128 vi5x0123 = _mm_loadu_ps(i5); in xnn_f32_dwconv_minmax_ukernel_up4x9__sse() local 122 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi5x0123, vk5x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x9__sse() 174 const __m128 vi5x0123 = _mm_loadu_ps(i5); in xnn_f32_dwconv_minmax_ukernel_up4x9__sse() local 176 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi5x0123, vk5x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x9__sse()
|
D | up4x9-minmax-neonfma.c | 109 const float32x4_t vi5x0123 = vld1q_f32(i5); i5 += 4; in xnn_f32_dwconv_minmax_ukernel_up4x9__neonfma() local 111 vacc0123p0 = vfmaq_f32(vacc0123p0, vi5x0123, vk5x0123); in xnn_f32_dwconv_minmax_ukernel_up4x9__neonfma() 155 const float32x4_t vi5x0123 = vld1q_f32(i5); in xnn_f32_dwconv_minmax_ukernel_up4x9__neonfma() local 157 vacc0123p0 = vfmaq_f32(vacc0123p0, vi5x0123, vk5x0123); in xnn_f32_dwconv_minmax_ukernel_up4x9__neonfma()
|
D | up4x9-minmax-neon.c | 109 const float32x4_t vi5x0123 = vld1q_f32(i5); i5 += 4; in xnn_f32_dwconv_minmax_ukernel_up4x9__neon() local 111 vacc0123p0 = vmlaq_f32(vacc0123p0, vi5x0123, vk5x0123); in xnn_f32_dwconv_minmax_ukernel_up4x9__neon() 155 const float32x4_t vi5x0123 = vld1q_f32(i5); in xnn_f32_dwconv_minmax_ukernel_up4x9__neon() local 157 vacc0123p0 = vmlaq_f32(vacc0123p0, vi5x0123, vk5x0123); in xnn_f32_dwconv_minmax_ukernel_up4x9__neon()
|
D | up8x9-minmax-wasmsimd-x86-acc2.c | 134 const v128_t vi5x0123 = wasm_v128_load(i5); in xnn_f32_dwconv_minmax_ukernel_up8x9__wasmsimd_x86_acc2() local 140 vacc0123p1 = wasm_f32x4_add(vacc0123p1, wasm_f32x4_mul(vi5x0123, vk5x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x9__wasmsimd_x86_acc2() 219 const v128_t vi5x0123 = wasm_v128_load(i5); in xnn_f32_dwconv_minmax_ukernel_up8x9__wasmsimd_x86_acc2() local 223 vacc0123p1 = wasm_f32x4_add(vacc0123p1, wasm_f32x4_mul(vi5x0123, vk5x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x9__wasmsimd_x86_acc2() 277 const v128_t vi5x0123 = wasm_v128_load(i5); in xnn_f32_dwconv_minmax_ukernel_up8x9__wasmsimd_x86_acc2() local 279 vacc0123p1 = wasm_f32x4_add(vacc0123p1, wasm_f32x4_mul(vi5x0123, vk5x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x9__wasmsimd_x86_acc2()
|
D | up4x9-minmax-wasmsimd-x86-acc2.c | 118 const v128_t vi5x0123 = wasm_v128_load(i5); in xnn_f32_dwconv_minmax_ukernel_up4x9__wasmsimd_x86_acc2() local 122 vacc0123p1 = wasm_f32x4_add(vacc0123p1, wasm_f32x4_mul(vi5x0123, vk5x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x9__wasmsimd_x86_acc2() 177 const v128_t vi5x0123 = wasm_v128_load(i5); in xnn_f32_dwconv_minmax_ukernel_up4x9__wasmsimd_x86_acc2() local 179 vacc0123p1 = wasm_f32x4_add(vacc0123p1, wasm_f32x4_mul(vi5x0123, vk5x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x9__wasmsimd_x86_acc2()
|
D | up4x9-minmax-neon-acc2.c | 109 const float32x4_t vi5x0123 = vld1q_f32(i5); i5 += 4; in xnn_f32_dwconv_minmax_ukernel_up4x9__neon_acc2() local 111 vacc0123p1 = vmlaq_f32(vacc0123p1, vi5x0123, vk5x0123); in xnn_f32_dwconv_minmax_ukernel_up4x9__neon_acc2() 157 const float32x4_t vi5x0123 = vld1q_f32(i5); in xnn_f32_dwconv_minmax_ukernel_up4x9__neon_acc2() local 159 vacc0123p1 = vmlaq_f32(vacc0123p1, vi5x0123, vk5x0123); in xnn_f32_dwconv_minmax_ukernel_up4x9__neon_acc2()
|
D | up4x9-minmax-sse-acc2.c | 118 const __m128 vi5x0123 = _mm_loadu_ps(i5); in xnn_f32_dwconv_minmax_ukernel_up4x9__sse_acc2() local 122 vacc0123p1 = _mm_add_ps(vacc0123p1, _mm_mul_ps(vi5x0123, vk5x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x9__sse_acc2() 176 const __m128 vi5x0123 = _mm_loadu_ps(i5); in xnn_f32_dwconv_minmax_ukernel_up4x9__sse_acc2() local 178 vacc0123p1 = _mm_add_ps(vacc0123p1, _mm_mul_ps(vi5x0123, vk5x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x9__sse_acc2()
|
D | up4x9-minmax-wasmsimd-x86.c | 118 const v128_t vi5x0123 = wasm_v128_load(i5); in xnn_f32_dwconv_minmax_ukernel_up4x9__wasmsimd_x86() local 122 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi5x0123, vk5x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x9__wasmsimd_x86() 175 const v128_t vi5x0123 = wasm_v128_load(i5); in xnn_f32_dwconv_minmax_ukernel_up4x9__wasmsimd_x86() local 177 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi5x0123, vk5x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x9__wasmsimd_x86()
|
D | up4x9-minmax-neonfma-acc2.c | 109 const float32x4_t vi5x0123 = vld1q_f32(i5); i5 += 4; in xnn_f32_dwconv_minmax_ukernel_up4x9__neonfma_acc2() local 111 vacc0123p1 = vfmaq_f32(vacc0123p1, vi5x0123, vk5x0123); in xnn_f32_dwconv_minmax_ukernel_up4x9__neonfma_acc2() 157 const float32x4_t vi5x0123 = vld1q_f32(i5); in xnn_f32_dwconv_minmax_ukernel_up4x9__neonfma_acc2() local 159 vacc0123p1 = vfmaq_f32(vacc0123p1, vi5x0123, vk5x0123); in xnn_f32_dwconv_minmax_ukernel_up4x9__neonfma_acc2()
|
D | up4x9-minmax-wasmsimd-arm-acc2.c | 118 const v128_t vi5x0123 = wasm_v128_load(i5); in xnn_f32_dwconv_minmax_ukernel_up4x9__wasmsimd_arm_acc2() local 122 vacc0123p1 = wasm_f32x4_add(vacc0123p1, wasm_f32x4_mul(vi5x0123, vk5x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x9__wasmsimd_arm_acc2() 177 const v128_t vi5x0123 = wasm_v128_load(i5); in xnn_f32_dwconv_minmax_ukernel_up4x9__wasmsimd_arm_acc2() local 179 vacc0123p1 = wasm_f32x4_add(vacc0123p1, wasm_f32x4_mul(vi5x0123, vk5x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x9__wasmsimd_arm_acc2()
|
/external/XNNPACK/src/f32-dwconv2d-chw/gen/ |
D | 5x5p2-minmax-neon-2x4-acc2.c | 75 float32x4_t vi5x0123 = vmovq_n_f32(0.0f); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_2x4_acc2() local 116 const float32x4_t vi5x3456 = vextq_f32(vi5x0123, vi5x4567, 3); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_2x4_acc2() 143 const float32x4_t vi5x2345 = vextq_f32(vi5x0123, vi5x4567, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_2x4_acc2() 144 vi5x0123 = vi5x4567; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_2x4_acc2() 262 const float32x4_t vi5x3456 = vextq_f32(vi5x0123, vi5x4567, 3); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_2x4_acc2() 289 const float32x4_t vi5x2345 = vextq_f32(vi5x0123, vi5x4567, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_2x4_acc2() 290 vi5x0123 = vi5x4567; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_2x4_acc2() 404 const float32x4_t vi5x3456 = vextq_f32(vi5x0123, vi5x4567, 3); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_2x4_acc2() 426 const float32x4_t vi5x2345 = vextq_f32(vi5x0123, vi5x4567, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_2x4_acc2()
|
D | 5x5p2-minmax-neonfma-2x4.c | 75 float32x4_t vi5x0123 = vmovq_n_f32(0.0f); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_2x4() local 116 const float32x4_t vi5x3456 = vextq_f32(vi5x0123, vi5x4567, 3); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_2x4() 143 const float32x4_t vi5x2345 = vextq_f32(vi5x0123, vi5x4567, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_2x4() 144 vi5x0123 = vi5x4567; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_2x4() 260 const float32x4_t vi5x3456 = vextq_f32(vi5x0123, vi5x4567, 3); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_2x4() 287 const float32x4_t vi5x2345 = vextq_f32(vi5x0123, vi5x4567, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_2x4() 288 vi5x0123 = vi5x4567; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_2x4() 400 const float32x4_t vi5x3456 = vextq_f32(vi5x0123, vi5x4567, 3); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_2x4() 422 const float32x4_t vi5x2345 = vextq_f32(vi5x0123, vi5x4567, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_2x4()
|
D | 5x5p2-minmax-neon-2x4.c | 75 float32x4_t vi5x0123 = vmovq_n_f32(0.0f); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_2x4() local 116 const float32x4_t vi5x3456 = vextq_f32(vi5x0123, vi5x4567, 3); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_2x4() 143 const float32x4_t vi5x2345 = vextq_f32(vi5x0123, vi5x4567, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_2x4() 144 vi5x0123 = vi5x4567; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_2x4() 260 const float32x4_t vi5x3456 = vextq_f32(vi5x0123, vi5x4567, 3); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_2x4() 287 const float32x4_t vi5x2345 = vextq_f32(vi5x0123, vi5x4567, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_2x4() 288 vi5x0123 = vi5x4567; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_2x4() 400 const float32x4_t vi5x3456 = vextq_f32(vi5x0123, vi5x4567, 3); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_2x4() 422 const float32x4_t vi5x2345 = vextq_f32(vi5x0123, vi5x4567, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_2x4()
|