/external/XNNPACK/src/f32-dwconv2d-chw/gen/ |
D | 3x3p1-minmax-neon-2x4-acc2.c | 204 float32x2_t vo1_lo = vget_low_f32(vo1); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_2x4_acc2() local 206 vst1_f32(o1, vo1_lo); o1 += 2; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_2x4_acc2() 210 vo1_lo = vget_high_f32(vo1); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_2x4_acc2() 213 vst1_lane_f32(o1, vo1_lo, 0); o1 += 1; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_2x4_acc2()
|
D | 3x3p1-minmax-neonfma-2x4-acc2.c | 204 float32x2_t vo1_lo = vget_low_f32(vo1); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_2x4_acc2() local 206 vst1_f32(o1, vo1_lo); o1 += 2; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_2x4_acc2() 210 vo1_lo = vget_high_f32(vo1); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_2x4_acc2() 213 vst1_lane_f32(o1, vo1_lo, 0); o1 += 1; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_2x4_acc2()
|
D | 3x3p1-minmax-neon-2x4.c | 200 float32x2_t vo1_lo = vget_low_f32(vo1); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_2x4() local 202 vst1_f32(o1, vo1_lo); o1 += 2; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_2x4() 206 vo1_lo = vget_high_f32(vo1); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_2x4() 209 vst1_lane_f32(o1, vo1_lo, 0); o1 += 1; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_2x4()
|
D | 3x3p1-minmax-neonfma-2x4.c | 200 float32x2_t vo1_lo = vget_low_f32(vo1); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_2x4() local 202 vst1_f32(o1, vo1_lo); o1 += 2; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_2x4() 206 vo1_lo = vget_high_f32(vo1); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_2x4() 209 vst1_lane_f32(o1, vo1_lo, 0); o1 += 1; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_2x4()
|
D | 3x3s2p1-minmax-neonfma-2x4-acc2.c | 210 float32x2_t vo1_lo = vget_low_f32(vo1); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_2x4_acc2() local 212 vst1_f32(o1, vo1_lo); o1 += 2; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_2x4_acc2() 216 vo1_lo = vget_high_f32(vo1); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_2x4_acc2() 219 vst1_lane_f32(o1, vo1_lo, 0); o1 += 1; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_2x4_acc2()
|
D | 3x3s2p1-minmax-neon-2x4.c | 206 float32x2_t vo1_lo = vget_low_f32(vo1); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_2x4() local 208 vst1_f32(o1, vo1_lo); o1 += 2; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_2x4() 212 vo1_lo = vget_high_f32(vo1); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_2x4() 215 vst1_lane_f32(o1, vo1_lo, 0); o1 += 1; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_2x4()
|
D | 3x3s2p1-minmax-neonfma-2x4.c | 206 float32x2_t vo1_lo = vget_low_f32(vo1); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_2x4() local 208 vst1_f32(o1, vo1_lo); o1 += 2; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_2x4() 212 vo1_lo = vget_high_f32(vo1); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_2x4() 215 vst1_lane_f32(o1, vo1_lo, 0); o1 += 1; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_2x4()
|
D | 3x3p1-minmax-neonfma-3x4.c | 242 float32x2_t vo1_lo = vget_low_f32(vo1); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_3x4() local 246 vst1_f32(o1, vo1_lo); o1 += 2; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_3x4() 250 vo1_lo = vget_high_f32(vo1); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_3x4() 255 vst1_lane_f32(o1, vo1_lo, 0); o1 += 1; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_3x4()
|
D | 3x3p1-minmax-neon-3x4.c | 242 float32x2_t vo1_lo = vget_low_f32(vo1); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_3x4() local 246 vst1_f32(o1, vo1_lo); o1 += 2; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_3x4() 250 vo1_lo = vget_high_f32(vo1); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_3x4() 255 vst1_lane_f32(o1, vo1_lo, 0); o1 += 1; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_3x4()
|
D | 3x3s2p1-minmax-neonfma-3x4.c | 258 float32x2_t vo1_lo = vget_low_f32(vo1); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_3x4() local 262 vst1_f32(o1, vo1_lo); o1 += 2; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_3x4() 266 vo1_lo = vget_high_f32(vo1); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_3x4() 271 vst1_lane_f32(o1, vo1_lo, 0); o1 += 1; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_3x4()
|
D | 3x3s2p1-minmax-neon-3x4.c | 258 float32x2_t vo1_lo = vget_low_f32(vo1); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_3x4() local 262 vst1_f32(o1, vo1_lo); o1 += 2; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_3x4() 266 vo1_lo = vget_high_f32(vo1); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_3x4() 271 vst1_lane_f32(o1, vo1_lo, 0); o1 += 1; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_3x4()
|
D | 3x3p1-minmax-neonfma-4x4.c | 284 float32x2_t vo1_lo = vget_low_f32(vo1); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_4x4() local 290 vst1_f32(o1, vo1_lo); o1 += 2; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_4x4() 294 vo1_lo = vget_high_f32(vo1); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_4x4() 301 vst1_lane_f32(o1, vo1_lo, 0); o1 += 1; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_4x4()
|
D | 3x3p1-minmax-neon-4x4.c | 284 float32x2_t vo1_lo = vget_low_f32(vo1); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_4x4() local 290 vst1_f32(o1, vo1_lo); o1 += 2; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_4x4() 294 vo1_lo = vget_high_f32(vo1); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_4x4() 301 vst1_lane_f32(o1, vo1_lo, 0); o1 += 1; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_4x4()
|
D | 3x3p1-minmax-neon-5x4.c | 326 float32x2_t vo1_lo = vget_low_f32(vo1); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_5x4() local 334 vst1_f32(o1, vo1_lo); o1 += 2; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_5x4() 338 vo1_lo = vget_high_f32(vo1); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_5x4() 347 vst1_lane_f32(o1, vo1_lo, 0); o1 += 1; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_5x4()
|
D | 3x3s2p1-minmax-neonfma-4x4.c | 310 float32x2_t vo1_lo = vget_low_f32(vo1); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_4x4() local 316 vst1_f32(o1, vo1_lo); o1 += 2; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_4x4() 320 vo1_lo = vget_high_f32(vo1); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_4x4() 327 vst1_lane_f32(o1, vo1_lo, 0); o1 += 1; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_4x4()
|
D | 3x3s2p1-minmax-neon-4x4.c | 310 float32x2_t vo1_lo = vget_low_f32(vo1); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_4x4() local 316 vst1_f32(o1, vo1_lo); o1 += 2; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_4x4() 320 vo1_lo = vget_high_f32(vo1); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_4x4() 327 vst1_lane_f32(o1, vo1_lo, 0); o1 += 1; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_4x4()
|
D | 3x3p1-minmax-neonfma-5x4.c | 326 float32x2_t vo1_lo = vget_low_f32(vo1); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_5x4() local 334 vst1_f32(o1, vo1_lo); o1 += 2; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_5x4() 338 vo1_lo = vget_high_f32(vo1); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_5x4() 347 vst1_lane_f32(o1, vo1_lo, 0); o1 += 1; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_5x4()
|
D | 5x5s2p2-minmax-neon-2x4-acc2.c | 390 float32x2_t vo1_lo = vget_low_f32(vo1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_2x4_acc2() local 392 vst1_f32(o1, vo1_lo); o1 += 2; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_2x4_acc2() 396 vo1_lo = vget_high_f32(vo1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_2x4_acc2() 399 vst1_lane_f32(o1, vo1_lo, 0); o1 += 1; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_2x4_acc2()
|
D | 5x5s2p2-minmax-neonfma-2x4-acc2.c | 390 float32x2_t vo1_lo = vget_low_f32(vo1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_2x4_acc2() local 392 vst1_f32(o1, vo1_lo); o1 += 2; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_2x4_acc2() 396 vo1_lo = vget_high_f32(vo1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_2x4_acc2() 399 vst1_lane_f32(o1, vo1_lo, 0); o1 += 1; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_2x4_acc2()
|
D | 5x5s2p2-minmax-neon-2x4.c | 386 float32x2_t vo1_lo = vget_low_f32(vo1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_2x4() local 388 vst1_f32(o1, vo1_lo); o1 += 2; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_2x4() 392 vo1_lo = vget_high_f32(vo1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_2x4() 395 vst1_lane_f32(o1, vo1_lo, 0); o1 += 1; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_2x4()
|
D | 5x5s2p2-minmax-neon-2x4-acc3.c | 394 float32x2_t vo1_lo = vget_low_f32(vo1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_2x4_acc3() local 396 vst1_f32(o1, vo1_lo); o1 += 2; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_2x4_acc3() 400 vo1_lo = vget_high_f32(vo1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_2x4_acc3() 403 vst1_lane_f32(o1, vo1_lo, 0); o1 += 1; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_2x4_acc3()
|
D | 3x3p1-minmax-neon-6x4.c | 368 float32x2_t vo1_lo = vget_low_f32(vo1); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_6x4() local 378 vst1_f32(o1, vo1_lo); o1 += 2; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_6x4() 382 vo1_lo = vget_high_f32(vo1); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_6x4() 393 vst1_lane_f32(o1, vo1_lo, 0); o1 += 1; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_6x4()
|
D | 5x5s2p2-minmax-neonfma-2x4-acc3.c | 394 float32x2_t vo1_lo = vget_low_f32(vo1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_2x4_acc3() local 396 vst1_f32(o1, vo1_lo); o1 += 2; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_2x4_acc3() 400 vo1_lo = vget_high_f32(vo1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_2x4_acc3() 403 vst1_lane_f32(o1, vo1_lo, 0); o1 += 1; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_2x4_acc3()
|
D | 5x5s2p2-minmax-neonfma-2x4.c | 386 float32x2_t vo1_lo = vget_low_f32(vo1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_2x4() local 388 vst1_f32(o1, vo1_lo); o1 += 2; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_2x4() 392 vo1_lo = vget_high_f32(vo1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_2x4() 395 vst1_lane_f32(o1, vo1_lo, 0); o1 += 1; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_2x4()
|
D | 3x3p1-minmax-neonfma-6x4.c | 368 float32x2_t vo1_lo = vget_low_f32(vo1); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_6x4() local 378 vst1_f32(o1, vo1_lo); o1 += 2; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_6x4() 382 vo1_lo = vget_high_f32(vo1); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_6x4() 393 vst1_lane_f32(o1, vo1_lo, 0); o1 += 1; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_6x4()
|