/external/XNNPACK/src/f32-conv-hwc2chw/ |
D | 3x3s2p1c3x4-wasmsimd-2x2.c | 143 const v128_t vi2x2 = wasm_v128_load(i2); i2 += 4; in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2() local 150 …vo1x1 = wasm_f32x4_add(vo1x1, wasm_f32x4_mul(vk00c1, wasm_v32x4_shuffle(vi2x2, vi2x2, 0, 0, 0, 0))… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2() 163 …vo0x1 = wasm_f32x4_add(vo0x1, wasm_f32x4_mul(vk20c1, wasm_v32x4_shuffle(vi2x2, vi2x2, 0, 0, 0, 0))… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2() 171 …vo1x1 = wasm_f32x4_add(vo1x1, wasm_f32x4_mul(vk00c2, wasm_v32x4_shuffle(vi2x2, vi2x2, 1, 1, 1, 1))… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2() 184 …vo0x1 = wasm_f32x4_add(vo0x1, wasm_f32x4_mul(vk20c2, wasm_v32x4_shuffle(vi2x2, vi2x2, 1, 1, 1, 1))… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2() 192 …vo1x1 = wasm_f32x4_add(vo1x1, wasm_f32x4_mul(vk01c0, wasm_v32x4_shuffle(vi2x2, vi2x2, 2, 2, 2, 2))… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2() 205 …vo0x1 = wasm_f32x4_add(vo0x1, wasm_f32x4_mul(vk21c0, wasm_v32x4_shuffle(vi2x2, vi2x2, 2, 2, 2, 2))… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2() 213 …vo1x1 = wasm_f32x4_add(vo1x1, wasm_f32x4_mul(vk01c1, wasm_v32x4_shuffle(vi2x2, vi2x2, 3, 3, 3, 3))… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2() 226 …vo0x1 = wasm_f32x4_add(vo0x1, wasm_f32x4_mul(vk21c1, wasm_v32x4_shuffle(vi2x2, vi2x2, 3, 3, 3, 3))… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2() 281 …vo1x0 = wasm_f32x4_add(vo1x0, wasm_f32x4_mul(vk02c1, wasm_v32x4_shuffle(vi2x2, vi2x2, 0, 0, 0, 0))… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2() [all …]
|
D | 3x3s2p1c3x4-sse-2x2.c | 142 const __m128 vi2x2 = _mm_loadu_ps(i2); i2 += 4; in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_2x2() local 149 …vo1x1 = _mm_add_ps(vo1x1, _mm_mul_ps(vk00c1, _mm_shuffle_ps(vi2x2, vi2x2, _MM_SHUFFLE(0, 0, 0, 0))… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_2x2() 162 …vo0x1 = _mm_add_ps(vo0x1, _mm_mul_ps(vk20c1, _mm_shuffle_ps(vi2x2, vi2x2, _MM_SHUFFLE(0, 0, 0, 0))… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_2x2() 170 …vo1x1 = _mm_add_ps(vo1x1, _mm_mul_ps(vk00c2, _mm_shuffle_ps(vi2x2, vi2x2, _MM_SHUFFLE(1, 1, 1, 1))… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_2x2() 183 …vo0x1 = _mm_add_ps(vo0x1, _mm_mul_ps(vk20c2, _mm_shuffle_ps(vi2x2, vi2x2, _MM_SHUFFLE(1, 1, 1, 1))… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_2x2() 191 …vo1x1 = _mm_add_ps(vo1x1, _mm_mul_ps(vk01c0, _mm_shuffle_ps(vi2x2, vi2x2, _MM_SHUFFLE(2, 2, 2, 2))… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_2x2() 204 …vo0x1 = _mm_add_ps(vo0x1, _mm_mul_ps(vk21c0, _mm_shuffle_ps(vi2x2, vi2x2, _MM_SHUFFLE(2, 2, 2, 2))… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_2x2() 212 …vo1x1 = _mm_add_ps(vo1x1, _mm_mul_ps(vk01c1, _mm_shuffle_ps(vi2x2, vi2x2, _MM_SHUFFLE(3, 3, 3, 3))… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_2x2() 225 …vo0x1 = _mm_add_ps(vo0x1, _mm_mul_ps(vk21c1, _mm_shuffle_ps(vi2x2, vi2x2, _MM_SHUFFLE(3, 3, 3, 3))… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_2x2() 280 …vo1x0 = _mm_add_ps(vo1x0, _mm_mul_ps(vk02c1, _mm_shuffle_ps(vi2x2, vi2x2, _MM_SHUFFLE(0, 0, 0, 0))… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_2x2() [all …]
|
D | 3x3s2p1c3x4-neonfma-2x2.c | 142 const float32x4_t vi2x2 = vld1q_f32(i2); i2 += 4; in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfma_2x2() local 149 vo1x1 = vfmaq_laneq_f32(vo1x1, vk00c1, vi2x2, 0); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfma_2x2() 162 vo0x1 = vfmaq_laneq_f32(vo0x1, vk20c1, vi2x2, 0); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfma_2x2() 170 vo1x1 = vfmaq_laneq_f32(vo1x1, vk00c2, vi2x2, 1); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfma_2x2() 183 vo0x1 = vfmaq_laneq_f32(vo0x1, vk20c2, vi2x2, 1); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfma_2x2() 191 vo1x1 = vfmaq_laneq_f32(vo1x1, vk01c0, vi2x2, 2); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfma_2x2() 204 vo0x1 = vfmaq_laneq_f32(vo0x1, vk21c0, vi2x2, 2); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfma_2x2() 212 vo1x1 = vfmaq_laneq_f32(vo1x1, vk01c1, vi2x2, 3); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfma_2x2() 225 vo0x1 = vfmaq_laneq_f32(vo0x1, vk21c1, vi2x2, 3); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfma_2x2() 280 vo1x0 = vfmaq_laneq_f32(vo1x0, vk02c1, vi2x2, 0); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfma_2x2() [all …]
|
D | 3x3s2p1c3x4-neon-2x2.c | 142 const float32x4_t vi2x2 = vld1q_f32(i2); i2 += 4; in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2() local 149 vo1x1 = vmlaq_lane_f32(vo1x1, vk00c1, vget_low_f32(vi2x2), 0); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2() 162 vo0x1 = vmlaq_lane_f32(vo0x1, vk20c1, vget_low_f32(vi2x2), 0); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2() 170 vo1x1 = vmlaq_lane_f32(vo1x1, vk00c2, vget_low_f32(vi2x2), 1); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2() 183 vo0x1 = vmlaq_lane_f32(vo0x1, vk20c2, vget_low_f32(vi2x2), 1); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2() 191 vo1x1 = vmlaq_lane_f32(vo1x1, vk01c0, vget_high_f32(vi2x2), 0); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2() 204 vo0x1 = vmlaq_lane_f32(vo0x1, vk21c0, vget_high_f32(vi2x2), 0); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2() 212 vo1x1 = vmlaq_lane_f32(vo1x1, vk01c1, vget_high_f32(vi2x2), 1); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2() 225 vo0x1 = vmlaq_lane_f32(vo0x1, vk21c1, vget_high_f32(vi2x2), 1); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2() 280 vo1x0 = vmlaq_lane_f32(vo1x0, vk02c1, vget_low_f32(vi2x2), 0); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2() [all …]
|
/external/XNNPACK/src/f32-dwconv2d-chw/gen/ |
D | 5x5p2-minmax-scalar-2x1.c | 100 float vi2x2 = *i2++; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1() local 153 vi2x1 = vi2x2; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1() 161 vo1p0 += vi2x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1() 162 vo0p0 += vi2x2 * vk22; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1() 171 vi2x2 = vi2x3; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1() 248 vi2x1 = vi2x2; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1() 256 vo1p0 += vi2x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1() 257 vo0p0 += vi2x2 * vk22; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1() 266 vi2x2 = vi2x3; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1() 321 vo1p0 += vi2x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1() [all …]
|
D | 5x5p2-minmax-scalar-3x1.c | 108 float vi2x2 = *i2++; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1() local 175 vi2x1 = vi2x2; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1() 183 vo2p0 += vi2x2 * vk02; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1() 185 vo1p0 += vi2x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1() 187 vo0p0 += vi2x2 * vk22; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1() 199 vi2x2 = vi2x3; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1() 302 vi2x1 = vi2x2; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1() 310 vo2p0 += vi2x2 * vk02; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1() 312 vo1p0 += vi2x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1() 314 vo0p0 += vi2x2 * vk22; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1() [all …]
|
D | 5x5p2-minmax-scalar-1x1-acc4.c | 92 float vi2x2 = *i2++; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc4() local 131 vi2x1 = vi2x2; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc4() 137 vo0p0 += vi2x2 * vk22; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc4() 143 vi2x2 = vi2x3; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc4() 197 vi2x1 = vi2x2; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc4() 203 vo0p0 += vi2x2 * vk22; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc4() 209 vi2x2 = vi2x3; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc4() 247 vo0p0 += vi2x2 * vk22; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc4()
|
D | 5x5p2-minmax-scalar-1x1-acc2.c | 92 float vi2x2 = *i2++; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc2() local 131 vi2x1 = vi2x2; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc2() 137 vo0p0 += vi2x2 * vk22; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc2() 143 vi2x2 = vi2x3; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc2() 195 vi2x1 = vi2x2; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc2() 201 vo0p0 += vi2x2 * vk22; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc2() 207 vi2x2 = vi2x3; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc2() 243 vo0p0 += vi2x2 * vk22; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc2()
|
D | 5x5p2-minmax-scalar-1x1-acc3.c | 92 float vi2x2 = *i2++; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc3() local 131 vi2x1 = vi2x2; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc3() 137 vo0p0 += vi2x2 * vk22; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc3() 143 vi2x2 = vi2x3; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc3() 196 vi2x1 = vi2x2; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc3() 202 vo0p0 += vi2x2 * vk22; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc3() 208 vi2x2 = vi2x3; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc3() 245 vo0p0 += vi2x2 * vk22; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc3()
|
D | 5x5p2-minmax-scalar-1x1.c | 92 float vi2x2 = *i2++; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1() local 131 vi2x1 = vi2x2; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1() 137 vo0p0 += vi2x2 * vk22; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1() 143 vi2x2 = vi2x3; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1() 194 vi2x1 = vi2x2; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1() 200 vo0p0 += vi2x2 * vk22; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1() 206 vi2x2 = vi2x3; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1() 241 vo0p0 += vi2x2 * vk22; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1()
|
D | 5x5p2-minmax-scalar-3x1-acc2.c | 108 float vi2x2 = *i2++; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2() local 175 vi2x1 = vi2x2; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2() 183 vo2p0 += vi2x2 * vk02; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2() 185 vo1p1 += vi2x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2() 187 vo0p0 += vi2x2 * vk22; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2() 199 vi2x2 = vi2x3; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2() 305 vi2x1 = vi2x2; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2() 313 vo2p0 += vi2x2 * vk02; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2() 315 vo1p1 += vi2x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2() 317 vo0p0 += vi2x2 * vk22; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2() [all …]
|
D | 5x5p2-minmax-scalar-2x1-acc3.c | 100 float vi2x2 = *i2++; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3() local 153 vi2x1 = vi2x2; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3() 161 vo1p2 += vi2x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3() 162 vo0p0 += vi2x2 * vk22; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3() 171 vi2x2 = vi2x3; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3() 252 vi2x1 = vi2x2; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3() 260 vo1p2 += vi2x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3() 261 vo0p0 += vi2x2 * vk22; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3() 270 vi2x2 = vi2x3; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3() 329 vo1p2 += vi2x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3() [all …]
|
D | 5x5p2-minmax-scalar-2x1-acc2.c | 100 float vi2x2 = *i2++; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2() local 153 vi2x1 = vi2x2; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2() 161 vo1p1 += vi2x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2() 162 vo0p0 += vi2x2 * vk22; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2() 171 vi2x2 = vi2x3; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2() 250 vi2x1 = vi2x2; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2() 258 vo1p1 += vi2x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2() 259 vo0p0 += vi2x2 * vk22; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2() 268 vi2x2 = vi2x3; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2() 325 vo1p1 += vi2x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2() [all …]
|
D | 5x5p2-minmax-scalar-1x1-acc5.c | 92 float vi2x2 = *i2++; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc5() local 131 vi2x1 = vi2x2; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc5() 137 vo0p2 += vi2x2 * vk22; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc5() 143 vi2x2 = vi2x3; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc5() 198 vi2x1 = vi2x2; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc5() 204 vo0p2 += vi2x2 * vk22; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc5() 210 vi2x2 = vi2x3; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc5() 249 vo0p2 += vi2x2 * vk22; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc5()
|
D | 5x5s2p2-minmax-scalar-2x1.c | 115 float vi2x2 = *i2++; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1() local 159 vi2x0 = vi2x2; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1() 185 vo1p0 += vi2x2 * vk02; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1() 188 vo0p0 += vi2x2 * vk22; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1() 197 vi2x2 = vi2x4; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1() 267 vo1p0 += vi2x2 * vk02; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1() 270 vo0p0 += vi2x2 * vk22; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1() 321 vo1p0 += vi2x2 * vk02; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1() 324 vo0p0 += vi2x2 * vk22; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1()
|
D | 5x5s2p2-minmax-scalar-2x1-acc2.c | 115 float vi2x2 = *i2++; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc2() local 159 vi2x0 = vi2x2; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc2() 185 vo1p0 += vi2x2 * vk02; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc2() 188 vo0p0 += vi2x2 * vk22; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc2() 197 vi2x2 = vi2x4; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc2() 269 vo1p0 += vi2x2 * vk02; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc2() 272 vo0p0 += vi2x2 * vk22; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc2() 325 vo1p0 += vi2x2 * vk02; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc2() 328 vo0p0 += vi2x2 * vk22; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc2()
|
D | 5x5s2p2-minmax-scalar-2x1-acc3.c | 115 float vi2x2 = *i2++; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc3() local 159 vi2x0 = vi2x2; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc3() 185 vo1p1 += vi2x2 * vk02; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc3() 188 vo0p0 += vi2x2 * vk22; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc3() 197 vi2x2 = vi2x4; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc3() 271 vo1p1 += vi2x2 * vk02; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc3() 274 vo0p0 += vi2x2 * vk22; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc3() 329 vo1p1 += vi2x2 * vk02; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc3() 332 vo0p0 += vi2x2 * vk22; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc3()
|
D | 5x5s2p2-minmax-scalar-1x1-acc4.c | 100 float vi2x2 = *i2++; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc4() local 131 vi2x0 = vi2x2; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc4() 149 vo0p0 += vi2x2 * vk22; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc4() 155 vi2x2 = vi2x4; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc4() 202 vo0p0 += vi2x2 * vk22; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc4() 236 vo0p0 += vi2x2 * vk22; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc4()
|
D | 5x5s2p2-minmax-scalar-1x1.c | 100 float vi2x2 = *i2++; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1() local 131 vi2x0 = vi2x2; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1() 149 vo0p0 += vi2x2 * vk22; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1() 155 vi2x2 = vi2x4; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1() 199 vo0p0 += vi2x2 * vk22; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1() 230 vo0p0 += vi2x2 * vk22; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1()
|
D | 5x5s2p2-minmax-scalar-1x1-acc2.c | 100 float vi2x2 = *i2++; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc2() local 131 vi2x0 = vi2x2; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc2() 149 vo0p0 += vi2x2 * vk22; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc2() 155 vi2x2 = vi2x4; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc2() 200 vo0p0 += vi2x2 * vk22; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc2() 232 vo0p0 += vi2x2 * vk22; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc2()
|
D | 5x5s2p2-minmax-scalar-1x1-acc3.c | 100 float vi2x2 = *i2++; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc3() local 131 vi2x0 = vi2x2; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc3() 149 vo0p0 += vi2x2 * vk22; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc3() 155 vi2x2 = vi2x4; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc3() 201 vo0p0 += vi2x2 * vk22; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc3() 234 vo0p0 += vi2x2 * vk22; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc3()
|
D | 5x5s2p2-minmax-scalar-1x1-acc5.c | 100 float vi2x2 = *i2++; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc5() local 131 vi2x0 = vi2x2; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc5() 149 vo0p2 += vi2x2 * vk22; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc5() 155 vi2x2 = vi2x4; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc5() 203 vo0p2 += vi2x2 * vk22; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc5() 238 vo0p2 += vi2x2 * vk22; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc5()
|
D | 3x3p1-minmax-scalar-3x1.c | 85 const float vi2x2 = *i2++; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_3x1() local 117 vi2x1 = vi2x2; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_3x1() 123 vo2p0 += vi2x2 * vk02; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_3x1() 125 vo1p0 += vi2x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_3x1() 127 vo0p0 += vi2x2 * vk22; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_3x1()
|
/external/XNNPACK/src/f32-conv-hwc/gen/ |
D | 3x3s2p1c3x8-neonfma-2x2.c | 154 const float32x4_t vi2x2 = vld1q_f32(i2); i2 += 4; in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2() local 164 vo1x1c0123 = vfmaq_lane_f32(vo1x1c0123, vk00c1x0123, vget_low_f32(vi2x2), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2() 166 vo1x1c4567 = vfmaq_lane_f32(vo1x1c4567, vk00c1x4567, vget_low_f32(vi2x2), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2() 189 vo0x1c0123 = vfmaq_lane_f32(vo0x1c0123, vk20c1x0123, vget_low_f32(vi2x2), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2() 191 vo0x1c4567 = vfmaq_lane_f32(vo0x1c4567, vk20c1x4567, vget_low_f32(vi2x2), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2() 203 vo1x1c0123 = vfmaq_lane_f32(vo1x1c0123, vk00c2x0123, vget_low_f32(vi2x2), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2() 205 vo1x1c4567 = vfmaq_lane_f32(vo1x1c4567, vk00c2x4567, vget_low_f32(vi2x2), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2() 228 vo0x1c0123 = vfmaq_lane_f32(vo0x1c0123, vk20c2x0123, vget_low_f32(vi2x2), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2() 230 vo0x1c4567 = vfmaq_lane_f32(vo0x1c4567, vk20c2x4567, vget_low_f32(vi2x2), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2() 242 vo1x1c0123 = vfmaq_lane_f32(vo1x1c0123, vk01c0x0123, vget_high_f32(vi2x2), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2() [all …]
|
D | 3x3s2p1c3x8-neon-2x2.c | 152 const float32x4_t vi2x2 = vld1q_f32(i2); i2 += 4; in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2() local 162 vo1x1c0123 = vmlaq_lane_f32(vo1x1c0123, vk00c1x0123, vget_low_f32(vi2x2), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2() 164 vo1x1c4567 = vmlaq_lane_f32(vo1x1c4567, vk00c1x4567, vget_low_f32(vi2x2), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2() 187 vo0x1c0123 = vmlaq_lane_f32(vo0x1c0123, vk20c1x0123, vget_low_f32(vi2x2), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2() 189 vo0x1c4567 = vmlaq_lane_f32(vo0x1c4567, vk20c1x4567, vget_low_f32(vi2x2), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2() 201 vo1x1c0123 = vmlaq_lane_f32(vo1x1c0123, vk00c2x0123, vget_low_f32(vi2x2), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2() 203 vo1x1c4567 = vmlaq_lane_f32(vo1x1c4567, vk00c2x4567, vget_low_f32(vi2x2), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2() 226 vo0x1c0123 = vmlaq_lane_f32(vo0x1c0123, vk20c2x0123, vget_low_f32(vi2x2), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2() 228 vo0x1c4567 = vmlaq_lane_f32(vo0x1c4567, vk20c2x4567, vget_low_f32(vi2x2), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2() 240 vo1x1c0123 = vmlaq_lane_f32(vo1x1c0123, vk01c0x0123, vget_high_f32(vi2x2), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2() [all …]
|