/external/XNNPACK/src/f32-dwconv/gen/ |
D | up8x4-wasmsimd.c | 66 const v128_t vk0x0123 = wasm_v128_load(w + 8); in xnn_f32_dwconv_ukernel_up8x4__wasmsimd() local 68 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi0x0123, vk0x0123)); in xnn_f32_dwconv_ukernel_up8x4__wasmsimd() 114 const v128_t vk0x0123 = wasm_v128_load(w + 8); in xnn_f32_dwconv_ukernel_up8x4__wasmsimd() local 115 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi0x0123, vk0x0123)); in xnn_f32_dwconv_ukernel_up8x4__wasmsimd() 147 const v128_t vk0x0123 = wasm_v128_load(w + 8); in xnn_f32_dwconv_ukernel_up8x4__wasmsimd() local 148 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi0x0123, vk0x0123)); in xnn_f32_dwconv_ukernel_up8x4__wasmsimd()
|
D | up8x4-minmax-sse-acc2.c | 68 const __m128 vk0x0123 = _mm_load_ps(w + 8); in xnn_f32_dwconv_minmax_ukernel_up8x4__sse_acc2() local 70 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi0x0123, vk0x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x4__sse_acc2() 121 const __m128 vk0x0123 = _mm_load_ps(w + 8); in xnn_f32_dwconv_minmax_ukernel_up8x4__sse_acc2() local 122 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi0x0123, vk0x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x4__sse_acc2() 157 const __m128 vk0x0123 = _mm_load_ps(w + 8); in xnn_f32_dwconv_minmax_ukernel_up8x4__sse_acc2() local 158 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi0x0123, vk0x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x4__sse_acc2()
|
D | up8x4-minmax-neon.c | 67 const float32x4_t vk0x0123 = vld1q_f32(w); w += 4; in xnn_f32_dwconv_minmax_ukernel_up8x4__neon() local 69 vacc0123p0 = vmlaq_f32(vacc0123p0, vi0x0123, vk0x0123); in xnn_f32_dwconv_minmax_ukernel_up8x4__neon() 107 const float32x4_t vk0x0123 = vld1q_f32(w + 4); in xnn_f32_dwconv_minmax_ukernel_up8x4__neon() local 108 vacc0123p0 = vmlaq_f32(vacc0123p0, vi0x0123, vk0x0123); in xnn_f32_dwconv_minmax_ukernel_up8x4__neon() 133 const float32x4_t vk0x0123 = vld1q_f32(w + 8); in xnn_f32_dwconv_minmax_ukernel_up8x4__neon() local 134 vacc0123p0 = vmlaq_f32(vacc0123p0, vi0x0123, vk0x0123); in xnn_f32_dwconv_minmax_ukernel_up8x4__neon()
|
D | up8x4-minmax-wasmsimd-arm-acc2.c | 68 const v128_t vk0x0123 = wasm_v128_load(w + 8); in xnn_f32_dwconv_minmax_ukernel_up8x4__wasmsimd_arm_acc2() local 70 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi0x0123, vk0x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x4__wasmsimd_arm_acc2() 122 const v128_t vk0x0123 = wasm_v128_load(w + 8); in xnn_f32_dwconv_minmax_ukernel_up8x4__wasmsimd_arm_acc2() local 123 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi0x0123, vk0x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x4__wasmsimd_arm_acc2() 158 const v128_t vk0x0123 = wasm_v128_load(w + 8); in xnn_f32_dwconv_minmax_ukernel_up8x4__wasmsimd_arm_acc2() local 159 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi0x0123, vk0x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x4__wasmsimd_arm_acc2()
|
D | up8x4-minmax-neonfma.c | 67 const float32x4_t vk0x0123 = vld1q_f32(w); w += 4; in xnn_f32_dwconv_minmax_ukernel_up8x4__neonfma() local 69 vacc0123p0 = vfmaq_f32(vacc0123p0, vi0x0123, vk0x0123); in xnn_f32_dwconv_minmax_ukernel_up8x4__neonfma() 107 const float32x4_t vk0x0123 = vld1q_f32(w + 4); in xnn_f32_dwconv_minmax_ukernel_up8x4__neonfma() local 108 vacc0123p0 = vfmaq_f32(vacc0123p0, vi0x0123, vk0x0123); in xnn_f32_dwconv_minmax_ukernel_up8x4__neonfma() 133 const float32x4_t vk0x0123 = vld1q_f32(w + 8); in xnn_f32_dwconv_minmax_ukernel_up8x4__neonfma() local 134 vacc0123p0 = vfmaq_f32(vacc0123p0, vi0x0123, vk0x0123); in xnn_f32_dwconv_minmax_ukernel_up8x4__neonfma()
|
D | up8x4-minmax-neon-acc2.c | 67 const float32x4_t vk0x0123 = vld1q_f32(w); w += 4; in xnn_f32_dwconv_minmax_ukernel_up8x4__neon_acc2() local 69 vacc0123p0 = vmlaq_f32(vacc0123p0, vi0x0123, vk0x0123); in xnn_f32_dwconv_minmax_ukernel_up8x4__neon_acc2() 110 const float32x4_t vk0x0123 = vld1q_f32(w + 4); in xnn_f32_dwconv_minmax_ukernel_up8x4__neon_acc2() local 111 vacc0123p0 = vmlaq_f32(vacc0123p0, vi0x0123, vk0x0123); in xnn_f32_dwconv_minmax_ukernel_up8x4__neon_acc2() 138 const float32x4_t vk0x0123 = vld1q_f32(w + 8); in xnn_f32_dwconv_minmax_ukernel_up8x4__neon_acc2() local 139 vacc0123p0 = vmlaq_f32(vacc0123p0, vi0x0123, vk0x0123); in xnn_f32_dwconv_minmax_ukernel_up8x4__neon_acc2()
|
D | up8x4-minmax-neonfma-acc2.c | 67 const float32x4_t vk0x0123 = vld1q_f32(w); w += 4; in xnn_f32_dwconv_minmax_ukernel_up8x4__neonfma_acc2() local 69 vacc0123p0 = vfmaq_f32(vacc0123p0, vi0x0123, vk0x0123); in xnn_f32_dwconv_minmax_ukernel_up8x4__neonfma_acc2() 110 const float32x4_t vk0x0123 = vld1q_f32(w + 4); in xnn_f32_dwconv_minmax_ukernel_up8x4__neonfma_acc2() local 111 vacc0123p0 = vfmaq_f32(vacc0123p0, vi0x0123, vk0x0123); in xnn_f32_dwconv_minmax_ukernel_up8x4__neonfma_acc2() 138 const float32x4_t vk0x0123 = vld1q_f32(w + 8); in xnn_f32_dwconv_minmax_ukernel_up8x4__neonfma_acc2() local 139 vacc0123p0 = vfmaq_f32(vacc0123p0, vi0x0123, vk0x0123); in xnn_f32_dwconv_minmax_ukernel_up8x4__neonfma_acc2()
|
D | up8x4-minmax-wasmsimd-arm.c | 68 const v128_t vk0x0123 = wasm_v128_load(w + 8); in xnn_f32_dwconv_minmax_ukernel_up8x4__wasmsimd_arm() local 70 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi0x0123, vk0x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x4__wasmsimd_arm() 119 const v128_t vk0x0123 = wasm_v128_load(w + 8); in xnn_f32_dwconv_minmax_ukernel_up8x4__wasmsimd_arm() local 120 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi0x0123, vk0x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x4__wasmsimd_arm() 153 const v128_t vk0x0123 = wasm_v128_load(w + 8); in xnn_f32_dwconv_minmax_ukernel_up8x4__wasmsimd_arm() local 154 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi0x0123, vk0x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x4__wasmsimd_arm()
|
D | up8x4-minmax-wasmsimd-x86.c | 68 const v128_t vk0x0123 = wasm_v128_load(w + 8); in xnn_f32_dwconv_minmax_ukernel_up8x4__wasmsimd_x86() local 70 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi0x0123, vk0x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x4__wasmsimd_x86() 119 const v128_t vk0x0123 = wasm_v128_load(w + 8); in xnn_f32_dwconv_minmax_ukernel_up8x4__wasmsimd_x86() local 120 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi0x0123, vk0x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x4__wasmsimd_x86() 153 const v128_t vk0x0123 = wasm_v128_load(w + 8); in xnn_f32_dwconv_minmax_ukernel_up8x4__wasmsimd_x86() local 154 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi0x0123, vk0x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x4__wasmsimd_x86()
|
D | up8x4-minmax-sse.c | 68 const __m128 vk0x0123 = _mm_load_ps(w + 8); in xnn_f32_dwconv_minmax_ukernel_up8x4__sse() local 70 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi0x0123, vk0x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x4__sse() 118 const __m128 vk0x0123 = _mm_load_ps(w + 8); in xnn_f32_dwconv_minmax_ukernel_up8x4__sse() local 119 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi0x0123, vk0x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x4__sse() 152 const __m128 vk0x0123 = _mm_load_ps(w + 8); in xnn_f32_dwconv_minmax_ukernel_up8x4__sse() local 153 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi0x0123, vk0x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x4__sse()
|
D | up8x4-minmax-wasmsimd-x86-acc2.c | 68 const v128_t vk0x0123 = wasm_v128_load(w + 8); in xnn_f32_dwconv_minmax_ukernel_up8x4__wasmsimd_x86_acc2() local 70 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi0x0123, vk0x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x4__wasmsimd_x86_acc2() 122 const v128_t vk0x0123 = wasm_v128_load(w + 8); in xnn_f32_dwconv_minmax_ukernel_up8x4__wasmsimd_x86_acc2() local 123 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi0x0123, vk0x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x4__wasmsimd_x86_acc2() 158 const v128_t vk0x0123 = wasm_v128_load(w + 8); in xnn_f32_dwconv_minmax_ukernel_up8x4__wasmsimd_x86_acc2() local 159 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi0x0123, vk0x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x4__wasmsimd_x86_acc2()
|
D | up4x4-wasmsimd.c | 64 const v128_t vk0x0123 = wasm_v128_load(w + 4); in xnn_f32_dwconv_ukernel_up4x4__wasmsimd() local 65 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi0x0123, vk0x0123)); in xnn_f32_dwconv_ukernel_up4x4__wasmsimd() 97 const v128_t vk0x0123 = wasm_v128_load(w + 4); in xnn_f32_dwconv_ukernel_up4x4__wasmsimd() local 98 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi0x0123, vk0x0123)); in xnn_f32_dwconv_ukernel_up4x4__wasmsimd()
|
D | up4x4-minmax-sse.c | 66 const __m128 vk0x0123 = _mm_load_ps(w + 4); in xnn_f32_dwconv_minmax_ukernel_up4x4__sse() local 67 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi0x0123, vk0x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x4__sse() 100 const __m128 vk0x0123 = _mm_load_ps(w + 4); in xnn_f32_dwconv_minmax_ukernel_up4x4__sse() local 101 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi0x0123, vk0x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x4__sse()
|
D | up4x4-minmax-wasmsimd-arm.c | 66 const v128_t vk0x0123 = wasm_v128_load(w + 4); in xnn_f32_dwconv_minmax_ukernel_up4x4__wasmsimd_arm() local 67 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi0x0123, vk0x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x4__wasmsimd_arm() 101 const v128_t vk0x0123 = wasm_v128_load(w + 4); in xnn_f32_dwconv_minmax_ukernel_up4x4__wasmsimd_arm() local 102 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi0x0123, vk0x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x4__wasmsimd_arm()
|
D | up4x4-minmax-wasmsimd-arm-acc2.c | 66 const v128_t vk0x0123 = wasm_v128_load(w + 4); in xnn_f32_dwconv_minmax_ukernel_up4x4__wasmsimd_arm_acc2() local 67 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi0x0123, vk0x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x4__wasmsimd_arm_acc2() 103 const v128_t vk0x0123 = wasm_v128_load(w + 4); in xnn_f32_dwconv_minmax_ukernel_up4x4__wasmsimd_arm_acc2() local 104 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi0x0123, vk0x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x4__wasmsimd_arm_acc2()
|
D | up4x4-minmax-sse-acc2.c | 66 const __m128 vk0x0123 = _mm_load_ps(w + 4); in xnn_f32_dwconv_minmax_ukernel_up4x4__sse_acc2() local 67 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi0x0123, vk0x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x4__sse_acc2() 102 const __m128 vk0x0123 = _mm_load_ps(w + 4); in xnn_f32_dwconv_minmax_ukernel_up4x4__sse_acc2() local 103 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi0x0123, vk0x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x4__sse_acc2()
|
D | up4x4-minmax-neon.c | 65 const float32x4_t vk0x0123 = vld1q_f32(w); w += 4; in xnn_f32_dwconv_minmax_ukernel_up4x4__neon() local 66 vacc0123p0 = vmlaq_f32(vacc0123p0, vi0x0123, vk0x0123); in xnn_f32_dwconv_minmax_ukernel_up4x4__neon() 91 const float32x4_t vk0x0123 = vld1q_f32(w); w += 4; in xnn_f32_dwconv_minmax_ukernel_up4x4__neon() local 92 vacc0123p0 = vmlaq_f32(vacc0123p0, vi0x0123, vk0x0123); in xnn_f32_dwconv_minmax_ukernel_up4x4__neon()
|
D | up4x4-minmax-neonfma-acc2.c | 65 const float32x4_t vk0x0123 = vld1q_f32(w); w += 4; in xnn_f32_dwconv_minmax_ukernel_up4x4__neonfma_acc2() local 66 vacc0123p0 = vfmaq_f32(vacc0123p0, vi0x0123, vk0x0123); in xnn_f32_dwconv_minmax_ukernel_up4x4__neonfma_acc2() 93 const float32x4_t vk0x0123 = vld1q_f32(w); w += 4; in xnn_f32_dwconv_minmax_ukernel_up4x4__neonfma_acc2() local 94 vacc0123p0 = vfmaq_f32(vacc0123p0, vi0x0123, vk0x0123); in xnn_f32_dwconv_minmax_ukernel_up4x4__neonfma_acc2()
|
D | up4x4-minmax-wasmsimd-x86.c | 66 const v128_t vk0x0123 = wasm_v128_load(w + 4); in xnn_f32_dwconv_minmax_ukernel_up4x4__wasmsimd_x86() local 67 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi0x0123, vk0x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x4__wasmsimd_x86() 101 const v128_t vk0x0123 = wasm_v128_load(w + 4); in xnn_f32_dwconv_minmax_ukernel_up4x4__wasmsimd_x86() local 102 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi0x0123, vk0x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x4__wasmsimd_x86()
|
D | up4x4-minmax-neonfma.c | 65 const float32x4_t vk0x0123 = vld1q_f32(w); w += 4; in xnn_f32_dwconv_minmax_ukernel_up4x4__neonfma() local 66 vacc0123p0 = vfmaq_f32(vacc0123p0, vi0x0123, vk0x0123); in xnn_f32_dwconv_minmax_ukernel_up4x4__neonfma() 91 const float32x4_t vk0x0123 = vld1q_f32(w); w += 4; in xnn_f32_dwconv_minmax_ukernel_up4x4__neonfma() local 92 vacc0123p0 = vfmaq_f32(vacc0123p0, vi0x0123, vk0x0123); in xnn_f32_dwconv_minmax_ukernel_up4x4__neonfma()
|
D | up4x4-minmax-neon-acc2.c | 65 const float32x4_t vk0x0123 = vld1q_f32(w); w += 4; in xnn_f32_dwconv_minmax_ukernel_up4x4__neon_acc2() local 66 vacc0123p0 = vmlaq_f32(vacc0123p0, vi0x0123, vk0x0123); in xnn_f32_dwconv_minmax_ukernel_up4x4__neon_acc2() 93 const float32x4_t vk0x0123 = vld1q_f32(w); w += 4; in xnn_f32_dwconv_minmax_ukernel_up4x4__neon_acc2() local 94 vacc0123p0 = vmlaq_f32(vacc0123p0, vi0x0123, vk0x0123); in xnn_f32_dwconv_minmax_ukernel_up4x4__neon_acc2()
|
D | up4x4-minmax-wasmsimd-x86-acc2.c | 66 const v128_t vk0x0123 = wasm_v128_load(w + 4); in xnn_f32_dwconv_minmax_ukernel_up4x4__wasmsimd_x86_acc2() local 67 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi0x0123, vk0x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x4__wasmsimd_x86_acc2() 103 const v128_t vk0x0123 = wasm_v128_load(w + 4); in xnn_f32_dwconv_minmax_ukernel_up4x4__wasmsimd_x86_acc2() local 104 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi0x0123, vk0x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x4__wasmsimd_x86_acc2()
|
D | up8x9-minmax-sse.c | 93 const __m128 vk0x0123 = _mm_load_ps(w + 8); in xnn_f32_dwconv_minmax_ukernel_up8x9__sse() local 95 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi0x0123, vk0x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x9__sse() 188 const __m128 vk0x0123 = _mm_load_ps(w + 8); in xnn_f32_dwconv_minmax_ukernel_up8x9__sse() local 189 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi0x0123, vk0x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x9__sse() 252 const __m128 vk0x0123 = _mm_load_ps(w + 8); in xnn_f32_dwconv_minmax_ukernel_up8x9__sse() local 253 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi0x0123, vk0x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x9__sse()
|
D | up8x9-wasmsimd.c | 91 const v128_t vk0x0123 = wasm_v128_load(w + 8); in xnn_f32_dwconv_ukernel_up8x9__wasmsimd() local 93 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi0x0123, vk0x0123)); in xnn_f32_dwconv_ukernel_up8x9__wasmsimd() 184 const v128_t vk0x0123 = wasm_v128_load(w + 8); in xnn_f32_dwconv_ukernel_up8x9__wasmsimd() local 185 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi0x0123, vk0x0123)); in xnn_f32_dwconv_ukernel_up8x9__wasmsimd() 247 const v128_t vk0x0123 = wasm_v128_load(w + 8); in xnn_f32_dwconv_ukernel_up8x9__wasmsimd() local 248 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi0x0123, vk0x0123)); in xnn_f32_dwconv_ukernel_up8x9__wasmsimd()
|
D | up8x9-minmax-neon.c | 92 const float32x4_t vk0x0123 = vld1q_f32(w); w += 4; in xnn_f32_dwconv_minmax_ukernel_up8x9__neon() local 94 vacc0123p0 = vmlaq_f32(vacc0123p0, vi0x0123, vk0x0123); in xnn_f32_dwconv_minmax_ukernel_up8x9__neon() 167 const float32x4_t vk0x0123 = vld1q_f32(w + 4); in xnn_f32_dwconv_minmax_ukernel_up8x9__neon() local 168 vacc0123p0 = vmlaq_f32(vacc0123p0, vi0x0123, vk0x0123); in xnn_f32_dwconv_minmax_ukernel_up8x9__neon() 213 const float32x4_t vk0x0123 = vld1q_f32(w + 8); in xnn_f32_dwconv_minmax_ukernel_up8x9__neon() local 214 vacc0123p0 = vmlaq_f32(vacc0123p0, vi0x0123, vk0x0123); in xnn_f32_dwconv_minmax_ukernel_up8x9__neon()
|