/external/XNNPACK/src/f32-dwconv2d-chw/gen/ |
D | 3x3p1-minmax-neonfma-1x4.c | 157 float32x2_t vo0_lo = vget_low_f32(vo0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_1x4() local 159 vst1_f32(o0, vo0_lo); o0 += 2; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_1x4() 161 vo0_lo = vget_high_f32(vo0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_1x4() 164 vst1_lane_f32(o0, vo0_lo, 0); o0 += 1; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_1x4()
|
D | 3x3p1-minmax-neonfma-1x4-acc2.c | 159 float32x2_t vo0_lo = vget_low_f32(vo0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_1x4_acc2() local 161 vst1_f32(o0, vo0_lo); o0 += 2; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_1x4_acc2() 163 vo0_lo = vget_high_f32(vo0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_1x4_acc2() 166 vst1_lane_f32(o0, vo0_lo, 0); o0 += 1; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_1x4_acc2()
|
D | 3x3p1-minmax-neon-1x4-acc3.c | 161 float32x2_t vo0_lo = vget_low_f32(vo0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_1x4_acc3() local 163 vst1_f32(o0, vo0_lo); o0 += 2; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_1x4_acc3() 165 vo0_lo = vget_high_f32(vo0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_1x4_acc3() 168 vst1_lane_f32(o0, vo0_lo, 0); o0 += 1; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_1x4_acc3()
|
D | 3x3p1-minmax-neonfma-1x4-acc3.c | 161 float32x2_t vo0_lo = vget_low_f32(vo0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_1x4_acc3() local 163 vst1_f32(o0, vo0_lo); o0 += 2; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_1x4_acc3() 165 vo0_lo = vget_high_f32(vo0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_1x4_acc3() 168 vst1_lane_f32(o0, vo0_lo, 0); o0 += 1; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_1x4_acc3()
|
D | 3x3p1-minmax-neon-1x4.c | 157 float32x2_t vo0_lo = vget_low_f32(vo0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_1x4() local 159 vst1_f32(o0, vo0_lo); o0 += 2; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_1x4() 161 vo0_lo = vget_high_f32(vo0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_1x4() 164 vst1_lane_f32(o0, vo0_lo, 0); o0 += 1; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_1x4()
|
D | 3x3p1-minmax-neon-1x4-acc2.c | 159 float32x2_t vo0_lo = vget_low_f32(vo0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_1x4_acc2() local 161 vst1_f32(o0, vo0_lo); o0 += 2; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_1x4_acc2() 163 vo0_lo = vget_high_f32(vo0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_1x4_acc2() 166 vst1_lane_f32(o0, vo0_lo, 0); o0 += 1; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_1x4_acc2()
|
D | 3x3p1-minmax-neon-1x4-acc4.c | 163 float32x2_t vo0_lo = vget_low_f32(vo0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_1x4_acc4() local 165 vst1_f32(o0, vo0_lo); o0 += 2; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_1x4_acc4() 167 vo0_lo = vget_high_f32(vo0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_1x4_acc4() 170 vst1_lane_f32(o0, vo0_lo, 0); o0 += 1; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_1x4_acc4()
|
D | 3x3s2p1-minmax-neonfma-1x4.c | 152 float32x2_t vo0_lo = vget_low_f32(vo0); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_1x4() local 154 vst1_f32(o0, vo0_lo); o0 += 2; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_1x4() 156 vo0_lo = vget_high_f32(vo0); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_1x4() 159 vst1_lane_f32(o0, vo0_lo, 0); o0 += 1; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_1x4()
|
D | 3x3s2p1-minmax-neonfma-1x4-acc2.c | 154 float32x2_t vo0_lo = vget_low_f32(vo0); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_1x4_acc2() local 156 vst1_f32(o0, vo0_lo); o0 += 2; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_1x4_acc2() 158 vo0_lo = vget_high_f32(vo0); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_1x4_acc2() 161 vst1_lane_f32(o0, vo0_lo, 0); o0 += 1; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_1x4_acc2()
|
D | 3x3s2p1-minmax-neonfma-1x4-acc4.c | 158 float32x2_t vo0_lo = vget_low_f32(vo0); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_1x4_acc4() local 160 vst1_f32(o0, vo0_lo); o0 += 2; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_1x4_acc4() 162 vo0_lo = vget_high_f32(vo0); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_1x4_acc4() 165 vst1_lane_f32(o0, vo0_lo, 0); o0 += 1; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_1x4_acc4()
|
D | 3x3s2p1-minmax-neon-1x4-acc2.c | 154 float32x2_t vo0_lo = vget_low_f32(vo0); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_1x4_acc2() local 156 vst1_f32(o0, vo0_lo); o0 += 2; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_1x4_acc2() 158 vo0_lo = vget_high_f32(vo0); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_1x4_acc2() 161 vst1_lane_f32(o0, vo0_lo, 0); o0 += 1; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_1x4_acc2()
|
D | 3x3s2p1-minmax-neon-1x4.c | 152 float32x2_t vo0_lo = vget_low_f32(vo0); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_1x4() local 154 vst1_f32(o0, vo0_lo); o0 += 2; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_1x4() 156 vo0_lo = vget_high_f32(vo0); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_1x4() 159 vst1_lane_f32(o0, vo0_lo, 0); o0 += 1; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_1x4()
|
D | 3x3p1-minmax-neonfma-1x4-acc4.c | 163 float32x2_t vo0_lo = vget_low_f32(vo0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_1x4_acc4() local 165 vst1_f32(o0, vo0_lo); o0 += 2; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_1x4_acc4() 167 vo0_lo = vget_high_f32(vo0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_1x4_acc4() 170 vst1_lane_f32(o0, vo0_lo, 0); o0 += 1; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_1x4_acc4()
|
D | 3x3s2p1-minmax-neon-1x4-acc3.c | 156 float32x2_t vo0_lo = vget_low_f32(vo0); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_1x4_acc3() local 158 vst1_f32(o0, vo0_lo); o0 += 2; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_1x4_acc3() 160 vo0_lo = vget_high_f32(vo0); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_1x4_acc3() 163 vst1_lane_f32(o0, vo0_lo, 0); o0 += 1; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_1x4_acc3()
|
D | 3x3s2p1-minmax-neonfma-1x4-acc3.c | 156 float32x2_t vo0_lo = vget_low_f32(vo0); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_1x4_acc3() local 158 vst1_f32(o0, vo0_lo); o0 += 2; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_1x4_acc3() 160 vo0_lo = vget_high_f32(vo0); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_1x4_acc3() 163 vst1_lane_f32(o0, vo0_lo, 0); o0 += 1; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_1x4_acc3()
|
D | 3x3s2p1-minmax-neon-1x4-acc4.c | 158 float32x2_t vo0_lo = vget_low_f32(vo0); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_1x4_acc4() local 160 vst1_f32(o0, vo0_lo); o0 += 2; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_1x4_acc4() 162 vo0_lo = vget_high_f32(vo0); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_1x4_acc4() 165 vst1_lane_f32(o0, vo0_lo, 0); o0 += 1; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_1x4_acc4()
|
D | 3x3p1-minmax-neon-2x4-acc2.c | 203 float32x2_t vo0_lo = vget_low_f32(vo0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_2x4_acc2() local 207 vst1_f32(o0, vo0_lo); o0 += 2; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_2x4_acc2() 209 vo0_lo = vget_high_f32(vo0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_2x4_acc2() 214 vst1_lane_f32(o0, vo0_lo, 0); o0 += 1; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_2x4_acc2()
|
D | 3x3p1-minmax-neonfma-2x4-acc2.c | 203 float32x2_t vo0_lo = vget_low_f32(vo0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_2x4_acc2() local 207 vst1_f32(o0, vo0_lo); o0 += 2; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_2x4_acc2() 209 vo0_lo = vget_high_f32(vo0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_2x4_acc2() 214 vst1_lane_f32(o0, vo0_lo, 0); o0 += 1; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_2x4_acc2()
|
D | 3x3p1-minmax-neon-2x4.c | 199 float32x2_t vo0_lo = vget_low_f32(vo0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_2x4() local 203 vst1_f32(o0, vo0_lo); o0 += 2; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_2x4() 205 vo0_lo = vget_high_f32(vo0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_2x4() 210 vst1_lane_f32(o0, vo0_lo, 0); o0 += 1; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_2x4()
|
D | 3x3p1-minmax-neonfma-2x4.c | 199 float32x2_t vo0_lo = vget_low_f32(vo0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_2x4() local 203 vst1_f32(o0, vo0_lo); o0 += 2; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_2x4() 205 vo0_lo = vget_high_f32(vo0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_2x4() 210 vst1_lane_f32(o0, vo0_lo, 0); o0 += 1; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_2x4()
|
D | 3x3s2p1-minmax-neonfma-2x4-acc2.c | 209 float32x2_t vo0_lo = vget_low_f32(vo0); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_2x4_acc2() local 213 vst1_f32(o0, vo0_lo); o0 += 2; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_2x4_acc2() 215 vo0_lo = vget_high_f32(vo0); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_2x4_acc2() 220 vst1_lane_f32(o0, vo0_lo, 0); o0 += 1; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_2x4_acc2()
|
D | 3x3s2p1-minmax-neon-2x4.c | 205 float32x2_t vo0_lo = vget_low_f32(vo0); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_2x4() local 209 vst1_f32(o0, vo0_lo); o0 += 2; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_2x4() 211 vo0_lo = vget_high_f32(vo0); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_2x4() 216 vst1_lane_f32(o0, vo0_lo, 0); o0 += 1; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_2x4()
|
D | 3x3s2p1-minmax-neonfma-2x4.c | 205 float32x2_t vo0_lo = vget_low_f32(vo0); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_2x4() local 209 vst1_f32(o0, vo0_lo); o0 += 2; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_2x4() 211 vo0_lo = vget_high_f32(vo0); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_2x4() 216 vst1_lane_f32(o0, vo0_lo, 0); o0 += 1; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_2x4()
|
D | 3x3p1-minmax-neonfma-3x4.c | 241 float32x2_t vo0_lo = vget_low_f32(vo0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_3x4() local 247 vst1_f32(o0, vo0_lo); o0 += 2; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_3x4() 249 vo0_lo = vget_high_f32(vo0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_3x4() 256 vst1_lane_f32(o0, vo0_lo, 0); o0 += 1; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_3x4()
|
D | 3x3p1-minmax-neon-3x4.c | 241 float32x2_t vo0_lo = vget_low_f32(vo0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_3x4() local 247 vst1_f32(o0, vo0_lo); o0 += 2; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_3x4() 249 vo0_lo = vget_high_f32(vo0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_3x4() 256 vst1_lane_f32(o0, vo0_lo, 0); o0 += 1; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_3x4()
|