Home
last modified time | relevance | path

Searched refs:vi2x2 (Results 1 – 25 of 58) sorted by relevance

123

/external/XNNPACK/src/f32-conv-hwc2chw/
D3x3s2p1c3x4-wasmsimd-2x2.c143 const v128_t vi2x2 = wasm_v128_load(i2); i2 += 4; in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2() local
150 …vo1x1 = wasm_f32x4_add(vo1x1, wasm_f32x4_mul(vk00c1, wasm_v32x4_shuffle(vi2x2, vi2x2, 0, 0, 0, 0))… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2()
163 …vo0x1 = wasm_f32x4_add(vo0x1, wasm_f32x4_mul(vk20c1, wasm_v32x4_shuffle(vi2x2, vi2x2, 0, 0, 0, 0))… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2()
171 …vo1x1 = wasm_f32x4_add(vo1x1, wasm_f32x4_mul(vk00c2, wasm_v32x4_shuffle(vi2x2, vi2x2, 1, 1, 1, 1))… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2()
184 …vo0x1 = wasm_f32x4_add(vo0x1, wasm_f32x4_mul(vk20c2, wasm_v32x4_shuffle(vi2x2, vi2x2, 1, 1, 1, 1))… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2()
192 …vo1x1 = wasm_f32x4_add(vo1x1, wasm_f32x4_mul(vk01c0, wasm_v32x4_shuffle(vi2x2, vi2x2, 2, 2, 2, 2))… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2()
205 …vo0x1 = wasm_f32x4_add(vo0x1, wasm_f32x4_mul(vk21c0, wasm_v32x4_shuffle(vi2x2, vi2x2, 2, 2, 2, 2))… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2()
213 …vo1x1 = wasm_f32x4_add(vo1x1, wasm_f32x4_mul(vk01c1, wasm_v32x4_shuffle(vi2x2, vi2x2, 3, 3, 3, 3))… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2()
226 …vo0x1 = wasm_f32x4_add(vo0x1, wasm_f32x4_mul(vk21c1, wasm_v32x4_shuffle(vi2x2, vi2x2, 3, 3, 3, 3))… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2()
281 …vo1x0 = wasm_f32x4_add(vo1x0, wasm_f32x4_mul(vk02c1, wasm_v32x4_shuffle(vi2x2, vi2x2, 0, 0, 0, 0))… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2()
[all …]
D3x3s2p1c3x4-sse-2x2.c142 const __m128 vi2x2 = _mm_loadu_ps(i2); i2 += 4; in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_2x2() local
149 …vo1x1 = _mm_add_ps(vo1x1, _mm_mul_ps(vk00c1, _mm_shuffle_ps(vi2x2, vi2x2, _MM_SHUFFLE(0, 0, 0, 0))… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_2x2()
162 …vo0x1 = _mm_add_ps(vo0x1, _mm_mul_ps(vk20c1, _mm_shuffle_ps(vi2x2, vi2x2, _MM_SHUFFLE(0, 0, 0, 0))… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_2x2()
170 …vo1x1 = _mm_add_ps(vo1x1, _mm_mul_ps(vk00c2, _mm_shuffle_ps(vi2x2, vi2x2, _MM_SHUFFLE(1, 1, 1, 1))… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_2x2()
183 …vo0x1 = _mm_add_ps(vo0x1, _mm_mul_ps(vk20c2, _mm_shuffle_ps(vi2x2, vi2x2, _MM_SHUFFLE(1, 1, 1, 1))… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_2x2()
191 …vo1x1 = _mm_add_ps(vo1x1, _mm_mul_ps(vk01c0, _mm_shuffle_ps(vi2x2, vi2x2, _MM_SHUFFLE(2, 2, 2, 2))… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_2x2()
204 …vo0x1 = _mm_add_ps(vo0x1, _mm_mul_ps(vk21c0, _mm_shuffle_ps(vi2x2, vi2x2, _MM_SHUFFLE(2, 2, 2, 2))… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_2x2()
212 …vo1x1 = _mm_add_ps(vo1x1, _mm_mul_ps(vk01c1, _mm_shuffle_ps(vi2x2, vi2x2, _MM_SHUFFLE(3, 3, 3, 3))… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_2x2()
225 …vo0x1 = _mm_add_ps(vo0x1, _mm_mul_ps(vk21c1, _mm_shuffle_ps(vi2x2, vi2x2, _MM_SHUFFLE(3, 3, 3, 3))… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_2x2()
280 …vo1x0 = _mm_add_ps(vo1x0, _mm_mul_ps(vk02c1, _mm_shuffle_ps(vi2x2, vi2x2, _MM_SHUFFLE(0, 0, 0, 0))… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_2x2()
[all …]
D3x3s2p1c3x4-neonfma-2x2.c142 const float32x4_t vi2x2 = vld1q_f32(i2); i2 += 4; in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfma_2x2() local
149 vo1x1 = vfmaq_laneq_f32(vo1x1, vk00c1, vi2x2, 0); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfma_2x2()
162 vo0x1 = vfmaq_laneq_f32(vo0x1, vk20c1, vi2x2, 0); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfma_2x2()
170 vo1x1 = vfmaq_laneq_f32(vo1x1, vk00c2, vi2x2, 1); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfma_2x2()
183 vo0x1 = vfmaq_laneq_f32(vo0x1, vk20c2, vi2x2, 1); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfma_2x2()
191 vo1x1 = vfmaq_laneq_f32(vo1x1, vk01c0, vi2x2, 2); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfma_2x2()
204 vo0x1 = vfmaq_laneq_f32(vo0x1, vk21c0, vi2x2, 2); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfma_2x2()
212 vo1x1 = vfmaq_laneq_f32(vo1x1, vk01c1, vi2x2, 3); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfma_2x2()
225 vo0x1 = vfmaq_laneq_f32(vo0x1, vk21c1, vi2x2, 3); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfma_2x2()
280 vo1x0 = vfmaq_laneq_f32(vo1x0, vk02c1, vi2x2, 0); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfma_2x2()
[all …]
D3x3s2p1c3x4-neon-2x2.c142 const float32x4_t vi2x2 = vld1q_f32(i2); i2 += 4; in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2() local
149 vo1x1 = vmlaq_lane_f32(vo1x1, vk00c1, vget_low_f32(vi2x2), 0); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2()
162 vo0x1 = vmlaq_lane_f32(vo0x1, vk20c1, vget_low_f32(vi2x2), 0); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2()
170 vo1x1 = vmlaq_lane_f32(vo1x1, vk00c2, vget_low_f32(vi2x2), 1); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2()
183 vo0x1 = vmlaq_lane_f32(vo0x1, vk20c2, vget_low_f32(vi2x2), 1); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2()
191 vo1x1 = vmlaq_lane_f32(vo1x1, vk01c0, vget_high_f32(vi2x2), 0); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2()
204 vo0x1 = vmlaq_lane_f32(vo0x1, vk21c0, vget_high_f32(vi2x2), 0); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2()
212 vo1x1 = vmlaq_lane_f32(vo1x1, vk01c1, vget_high_f32(vi2x2), 1); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2()
225 vo0x1 = vmlaq_lane_f32(vo0x1, vk21c1, vget_high_f32(vi2x2), 1); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2()
280 vo1x0 = vmlaq_lane_f32(vo1x0, vk02c1, vget_low_f32(vi2x2), 0); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2()
[all …]
/external/XNNPACK/src/f32-dwconv2d-chw/gen/
D5x5p2-minmax-scalar-2x1.c100 float vi2x2 = *i2++; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1() local
153 vi2x1 = vi2x2; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1()
161 vo1p0 += vi2x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1()
162 vo0p0 += vi2x2 * vk22; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1()
171 vi2x2 = vi2x3; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1()
248 vi2x1 = vi2x2; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1()
256 vo1p0 += vi2x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1()
257 vo0p0 += vi2x2 * vk22; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1()
266 vi2x2 = vi2x3; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1()
321 vo1p0 += vi2x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1()
[all …]
D5x5p2-minmax-scalar-3x1.c108 float vi2x2 = *i2++; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1() local
175 vi2x1 = vi2x2; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1()
183 vo2p0 += vi2x2 * vk02; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1()
185 vo1p0 += vi2x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1()
187 vo0p0 += vi2x2 * vk22; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1()
199 vi2x2 = vi2x3; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1()
302 vi2x1 = vi2x2; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1()
310 vo2p0 += vi2x2 * vk02; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1()
312 vo1p0 += vi2x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1()
314 vo0p0 += vi2x2 * vk22; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1()
[all …]
D5x5p2-minmax-scalar-1x1-acc4.c92 float vi2x2 = *i2++; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc4() local
131 vi2x1 = vi2x2; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc4()
137 vo0p0 += vi2x2 * vk22; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc4()
143 vi2x2 = vi2x3; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc4()
197 vi2x1 = vi2x2; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc4()
203 vo0p0 += vi2x2 * vk22; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc4()
209 vi2x2 = vi2x3; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc4()
247 vo0p0 += vi2x2 * vk22; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc4()
D5x5p2-minmax-scalar-1x1-acc2.c92 float vi2x2 = *i2++; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc2() local
131 vi2x1 = vi2x2; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc2()
137 vo0p0 += vi2x2 * vk22; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc2()
143 vi2x2 = vi2x3; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc2()
195 vi2x1 = vi2x2; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc2()
201 vo0p0 += vi2x2 * vk22; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc2()
207 vi2x2 = vi2x3; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc2()
243 vo0p0 += vi2x2 * vk22; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc2()
D5x5p2-minmax-scalar-1x1-acc3.c92 float vi2x2 = *i2++; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc3() local
131 vi2x1 = vi2x2; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc3()
137 vo0p0 += vi2x2 * vk22; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc3()
143 vi2x2 = vi2x3; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc3()
196 vi2x1 = vi2x2; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc3()
202 vo0p0 += vi2x2 * vk22; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc3()
208 vi2x2 = vi2x3; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc3()
245 vo0p0 += vi2x2 * vk22; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc3()
D5x5p2-minmax-scalar-1x1.c92 float vi2x2 = *i2++; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1() local
131 vi2x1 = vi2x2; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1()
137 vo0p0 += vi2x2 * vk22; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1()
143 vi2x2 = vi2x3; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1()
194 vi2x1 = vi2x2; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1()
200 vo0p0 += vi2x2 * vk22; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1()
206 vi2x2 = vi2x3; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1()
241 vo0p0 += vi2x2 * vk22; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1()
D5x5p2-minmax-scalar-3x1-acc2.c108 float vi2x2 = *i2++; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2() local
175 vi2x1 = vi2x2; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2()
183 vo2p0 += vi2x2 * vk02; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2()
185 vo1p1 += vi2x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2()
187 vo0p0 += vi2x2 * vk22; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2()
199 vi2x2 = vi2x3; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2()
305 vi2x1 = vi2x2; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2()
313 vo2p0 += vi2x2 * vk02; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2()
315 vo1p1 += vi2x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2()
317 vo0p0 += vi2x2 * vk22; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2()
[all …]
D5x5p2-minmax-scalar-2x1-acc3.c100 float vi2x2 = *i2++; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3() local
153 vi2x1 = vi2x2; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3()
161 vo1p2 += vi2x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3()
162 vo0p0 += vi2x2 * vk22; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3()
171 vi2x2 = vi2x3; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3()
252 vi2x1 = vi2x2; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3()
260 vo1p2 += vi2x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3()
261 vo0p0 += vi2x2 * vk22; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3()
270 vi2x2 = vi2x3; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3()
329 vo1p2 += vi2x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3()
[all …]
D5x5p2-minmax-scalar-2x1-acc2.c100 float vi2x2 = *i2++; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2() local
153 vi2x1 = vi2x2; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2()
161 vo1p1 += vi2x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2()
162 vo0p0 += vi2x2 * vk22; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2()
171 vi2x2 = vi2x3; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2()
250 vi2x1 = vi2x2; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2()
258 vo1p1 += vi2x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2()
259 vo0p0 += vi2x2 * vk22; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2()
268 vi2x2 = vi2x3; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2()
325 vo1p1 += vi2x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2()
[all …]
D5x5p2-minmax-scalar-1x1-acc5.c92 float vi2x2 = *i2++; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc5() local
131 vi2x1 = vi2x2; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc5()
137 vo0p2 += vi2x2 * vk22; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc5()
143 vi2x2 = vi2x3; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc5()
198 vi2x1 = vi2x2; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc5()
204 vo0p2 += vi2x2 * vk22; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc5()
210 vi2x2 = vi2x3; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc5()
249 vo0p2 += vi2x2 * vk22; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc5()
D5x5s2p2-minmax-scalar-2x1.c115 float vi2x2 = *i2++; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1() local
159 vi2x0 = vi2x2; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1()
185 vo1p0 += vi2x2 * vk02; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1()
188 vo0p0 += vi2x2 * vk22; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1()
197 vi2x2 = vi2x4; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1()
267 vo1p0 += vi2x2 * vk02; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1()
270 vo0p0 += vi2x2 * vk22; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1()
321 vo1p0 += vi2x2 * vk02; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1()
324 vo0p0 += vi2x2 * vk22; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1()
D5x5s2p2-minmax-scalar-2x1-acc2.c115 float vi2x2 = *i2++; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc2() local
159 vi2x0 = vi2x2; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc2()
185 vo1p0 += vi2x2 * vk02; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc2()
188 vo0p0 += vi2x2 * vk22; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc2()
197 vi2x2 = vi2x4; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc2()
269 vo1p0 += vi2x2 * vk02; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc2()
272 vo0p0 += vi2x2 * vk22; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc2()
325 vo1p0 += vi2x2 * vk02; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc2()
328 vo0p0 += vi2x2 * vk22; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc2()
D5x5s2p2-minmax-scalar-2x1-acc3.c115 float vi2x2 = *i2++; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc3() local
159 vi2x0 = vi2x2; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc3()
185 vo1p1 += vi2x2 * vk02; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc3()
188 vo0p0 += vi2x2 * vk22; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc3()
197 vi2x2 = vi2x4; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc3()
271 vo1p1 += vi2x2 * vk02; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc3()
274 vo0p0 += vi2x2 * vk22; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc3()
329 vo1p1 += vi2x2 * vk02; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc3()
332 vo0p0 += vi2x2 * vk22; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc3()
D5x5s2p2-minmax-scalar-1x1-acc4.c100 float vi2x2 = *i2++; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc4() local
131 vi2x0 = vi2x2; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc4()
149 vo0p0 += vi2x2 * vk22; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc4()
155 vi2x2 = vi2x4; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc4()
202 vo0p0 += vi2x2 * vk22; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc4()
236 vo0p0 += vi2x2 * vk22; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc4()
D5x5s2p2-minmax-scalar-1x1.c100 float vi2x2 = *i2++; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1() local
131 vi2x0 = vi2x2; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1()
149 vo0p0 += vi2x2 * vk22; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1()
155 vi2x2 = vi2x4; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1()
199 vo0p0 += vi2x2 * vk22; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1()
230 vo0p0 += vi2x2 * vk22; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1()
D5x5s2p2-minmax-scalar-1x1-acc2.c100 float vi2x2 = *i2++; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc2() local
131 vi2x0 = vi2x2; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc2()
149 vo0p0 += vi2x2 * vk22; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc2()
155 vi2x2 = vi2x4; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc2()
200 vo0p0 += vi2x2 * vk22; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc2()
232 vo0p0 += vi2x2 * vk22; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc2()
D5x5s2p2-minmax-scalar-1x1-acc3.c100 float vi2x2 = *i2++; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc3() local
131 vi2x0 = vi2x2; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc3()
149 vo0p0 += vi2x2 * vk22; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc3()
155 vi2x2 = vi2x4; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc3()
201 vo0p0 += vi2x2 * vk22; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc3()
234 vo0p0 += vi2x2 * vk22; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc3()
D5x5s2p2-minmax-scalar-1x1-acc5.c100 float vi2x2 = *i2++; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc5() local
131 vi2x0 = vi2x2; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc5()
149 vo0p2 += vi2x2 * vk22; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc5()
155 vi2x2 = vi2x4; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc5()
203 vo0p2 += vi2x2 * vk22; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc5()
238 vo0p2 += vi2x2 * vk22; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc5()
D3x3p1-minmax-scalar-3x1.c85 const float vi2x2 = *i2++; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_3x1() local
117 vi2x1 = vi2x2; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_3x1()
123 vo2p0 += vi2x2 * vk02; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_3x1()
125 vo1p0 += vi2x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_3x1()
127 vo0p0 += vi2x2 * vk22; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_3x1()
/external/XNNPACK/src/f32-conv-hwc/gen/
D3x3s2p1c3x8-neonfma-2x2.c154 const float32x4_t vi2x2 = vld1q_f32(i2); i2 += 4; in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2() local
164 vo1x1c0123 = vfmaq_lane_f32(vo1x1c0123, vk00c1x0123, vget_low_f32(vi2x2), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2()
166 vo1x1c4567 = vfmaq_lane_f32(vo1x1c4567, vk00c1x4567, vget_low_f32(vi2x2), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2()
189 vo0x1c0123 = vfmaq_lane_f32(vo0x1c0123, vk20c1x0123, vget_low_f32(vi2x2), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2()
191 vo0x1c4567 = vfmaq_lane_f32(vo0x1c4567, vk20c1x4567, vget_low_f32(vi2x2), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2()
203 vo1x1c0123 = vfmaq_lane_f32(vo1x1c0123, vk00c2x0123, vget_low_f32(vi2x2), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2()
205 vo1x1c4567 = vfmaq_lane_f32(vo1x1c4567, vk00c2x4567, vget_low_f32(vi2x2), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2()
228 vo0x1c0123 = vfmaq_lane_f32(vo0x1c0123, vk20c2x0123, vget_low_f32(vi2x2), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2()
230 vo0x1c4567 = vfmaq_lane_f32(vo0x1c4567, vk20c2x4567, vget_low_f32(vi2x2), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2()
242 vo1x1c0123 = vfmaq_lane_f32(vo1x1c0123, vk01c0x0123, vget_high_f32(vi2x2), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2()
[all …]
D3x3s2p1c3x8-neon-2x2.c152 const float32x4_t vi2x2 = vld1q_f32(i2); i2 += 4; in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2() local
162 vo1x1c0123 = vmlaq_lane_f32(vo1x1c0123, vk00c1x0123, vget_low_f32(vi2x2), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2()
164 vo1x1c4567 = vmlaq_lane_f32(vo1x1c4567, vk00c1x4567, vget_low_f32(vi2x2), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2()
187 vo0x1c0123 = vmlaq_lane_f32(vo0x1c0123, vk20c1x0123, vget_low_f32(vi2x2), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2()
189 vo0x1c4567 = vmlaq_lane_f32(vo0x1c4567, vk20c1x4567, vget_low_f32(vi2x2), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2()
201 vo1x1c0123 = vmlaq_lane_f32(vo1x1c0123, vk00c2x0123, vget_low_f32(vi2x2), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2()
203 vo1x1c4567 = vmlaq_lane_f32(vo1x1c4567, vk00c2x4567, vget_low_f32(vi2x2), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2()
226 vo0x1c0123 = vmlaq_lane_f32(vo0x1c0123, vk20c2x0123, vget_low_f32(vi2x2), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2()
228 vo0x1c4567 = vmlaq_lane_f32(vo0x1c4567, vk20c2x4567, vget_low_f32(vi2x2), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2()
240 vo1x1c0123 = vmlaq_lane_f32(vo1x1c0123, vk01c0x0123, vget_high_f32(vi2x2), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2()
[all …]

123