Home
last modified time | relevance | path

Searched refs:vi4x68AC (Results 1 – 25 of 70) sorted by relevance

123

/external/XNNPACK/src/f32-dwconv2d-chw/gen/
D5x5s2p2-minmax-neon-1x4-acc4.c122 const float32x4_t vi4x68AC = vextq_f32(vi4x0246, vi4x8ACE9BDF.val[0], 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_1x4_acc4() local
133 vo0p0 = vmlaq_lane_f32(vo0p0, vi4x68AC, vget_low_f32(vwKLMN), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_1x4_acc4()
235 const float32x4_t vi4x68AC = vextq_f32(vi4x0246, vi4x8ACE, 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_1x4_acc4() local
245 vo0p0 = vmlaq_lane_f32(vo0p0, vi4x68AC, vget_low_f32(vwKLMN), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_1x4_acc4()
D5x5s2p2-minmax-neon-1x4-acc3.c122 const float32x4_t vi4x68AC = vextq_f32(vi4x0246, vi4x8ACE9BDF.val[0], 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_1x4_acc3() local
133 vo0p1 = vmlaq_lane_f32(vo0p1, vi4x68AC, vget_low_f32(vwKLMN), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_1x4_acc3()
234 const float32x4_t vi4x68AC = vextq_f32(vi4x0246, vi4x8ACE, 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_1x4_acc3() local
244 vo0p1 = vmlaq_lane_f32(vo0p1, vi4x68AC, vget_low_f32(vwKLMN), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_1x4_acc3()
D5x5s2p2-minmax-neonfma-1x4-acc4.c122 const float32x4_t vi4x68AC = vextq_f32(vi4x0246, vi4x8ACE9BDF.val[0], 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_1x4_acc4() local
133 vo0p0 = vfmaq_lane_f32(vo0p0, vi4x68AC, vget_low_f32(vwKLMN), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_1x4_acc4()
235 const float32x4_t vi4x68AC = vextq_f32(vi4x0246, vi4x8ACE, 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_1x4_acc4() local
245 vo0p0 = vfmaq_lane_f32(vo0p0, vi4x68AC, vget_low_f32(vwKLMN), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_1x4_acc4()
D5x5s2p2-minmax-neonfma-1x4-acc2.c122 const float32x4_t vi4x68AC = vextq_f32(vi4x0246, vi4x8ACE9BDF.val[0], 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_1x4_acc2() local
133 vo0p0 = vfmaq_lane_f32(vo0p0, vi4x68AC, vget_low_f32(vwKLMN), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_1x4_acc2()
233 const float32x4_t vi4x68AC = vextq_f32(vi4x0246, vi4x8ACE, 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_1x4_acc2() local
243 vo0p0 = vfmaq_lane_f32(vo0p0, vi4x68AC, vget_low_f32(vwKLMN), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_1x4_acc2()
D5x5s2p2-minmax-neonfma-1x4-acc3.c122 const float32x4_t vi4x68AC = vextq_f32(vi4x0246, vi4x8ACE9BDF.val[0], 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_1x4_acc3() local
133 vo0p1 = vfmaq_lane_f32(vo0p1, vi4x68AC, vget_low_f32(vwKLMN), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_1x4_acc3()
234 const float32x4_t vi4x68AC = vextq_f32(vi4x0246, vi4x8ACE, 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_1x4_acc3() local
244 vo0p1 = vfmaq_lane_f32(vo0p1, vi4x68AC, vget_low_f32(vwKLMN), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_1x4_acc3()
D5x5s2p2-minmax-neon-1x4.c122 const float32x4_t vi4x68AC = vextq_f32(vi4x0246, vi4x8ACE9BDF.val[0], 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_1x4() local
133 vo0p0 = vmlaq_lane_f32(vo0p0, vi4x68AC, vget_low_f32(vwKLMN), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_1x4()
232 const float32x4_t vi4x68AC = vextq_f32(vi4x0246, vi4x8ACE, 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_1x4() local
242 vo0p0 = vmlaq_lane_f32(vo0p0, vi4x68AC, vget_low_f32(vwKLMN), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_1x4()
D5x5s2p2-minmax-neonfma-1x4.c122 const float32x4_t vi4x68AC = vextq_f32(vi4x0246, vi4x8ACE9BDF.val[0], 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_1x4() local
133 vo0p0 = vfmaq_lane_f32(vo0p0, vi4x68AC, vget_low_f32(vwKLMN), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_1x4()
232 const float32x4_t vi4x68AC = vextq_f32(vi4x0246, vi4x8ACE, 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_1x4() local
242 vo0p0 = vfmaq_lane_f32(vo0p0, vi4x68AC, vget_low_f32(vwKLMN), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_1x4()
D5x5s2p2-minmax-neon-1x4-acc2.c122 const float32x4_t vi4x68AC = vextq_f32(vi4x0246, vi4x8ACE9BDF.val[0], 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_1x4_acc2() local
133 vo0p0 = vmlaq_lane_f32(vo0p0, vi4x68AC, vget_low_f32(vwKLMN), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_1x4_acc2()
233 const float32x4_t vi4x68AC = vextq_f32(vi4x0246, vi4x8ACE, 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_1x4_acc2() local
243 vo0p0 = vmlaq_lane_f32(vo0p0, vi4x68AC, vget_low_f32(vwKLMN), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_1x4_acc2()
D5x5s2p2-minmax-neonfma-3x4-acc2.c177 const float32x4_t vi4x68AC = vextq_f32(vi4x0246, vi4x8ACE9BDF.val[0], 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_3x4_acc2() local
190 vo2p0 = vfmaq_lane_f32(vo2p0, vi4x68AC, vget_low_f32(vw0123), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_3x4_acc2()
197 vo1p0 = vfmaq_lane_f32(vo1p0, vi4x68AC, vget_high_f32(vw89AB), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_3x4_acc2()
204 vo0p0 = vfmaq_lane_f32(vo0p0, vi4x68AC, vget_low_f32(vwKLMN), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_3x4_acc2()
384 const float32x4_t vi4x68AC = vextq_f32(vi4x0246, vi4x8ACE, 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_3x4_acc2() local
392 vo2p0 = vfmaq_lane_f32(vo2p0, vi4x68AC, vget_low_f32(vw0123), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_3x4_acc2()
399 vo1p0 = vfmaq_lane_f32(vo1p0, vi4x68AC, vget_high_f32(vw89AB), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_3x4_acc2()
406 vo0p0 = vfmaq_lane_f32(vo0p0, vi4x68AC, vget_low_f32(vwKLMN), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_3x4_acc2()
D5x5s2p2-minmax-neon-3x4.c177 const float32x4_t vi4x68AC = vextq_f32(vi4x0246, vi4x8ACE9BDF.val[0], 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_3x4() local
190 vo2p0 = vmlaq_lane_f32(vo2p0, vi4x68AC, vget_low_f32(vw0123), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_3x4()
197 vo1p0 = vmlaq_lane_f32(vo1p0, vi4x68AC, vget_high_f32(vw89AB), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_3x4()
204 vo0p0 = vmlaq_lane_f32(vo0p0, vi4x68AC, vget_low_f32(vwKLMN), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_3x4()
381 const float32x4_t vi4x68AC = vextq_f32(vi4x0246, vi4x8ACE, 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_3x4() local
389 vo2p0 = vmlaq_lane_f32(vo2p0, vi4x68AC, vget_low_f32(vw0123), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_3x4()
396 vo1p0 = vmlaq_lane_f32(vo1p0, vi4x68AC, vget_high_f32(vw89AB), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_3x4()
403 vo0p0 = vmlaq_lane_f32(vo0p0, vi4x68AC, vget_low_f32(vwKLMN), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_3x4()
D5x5s2p2-minmax-neonfma-3x4.c177 const float32x4_t vi4x68AC = vextq_f32(vi4x0246, vi4x8ACE9BDF.val[0], 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_3x4() local
190 vo2p0 = vfmaq_lane_f32(vo2p0, vi4x68AC, vget_low_f32(vw0123), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_3x4()
197 vo1p0 = vfmaq_lane_f32(vo1p0, vi4x68AC, vget_high_f32(vw89AB), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_3x4()
204 vo0p0 = vfmaq_lane_f32(vo0p0, vi4x68AC, vget_low_f32(vwKLMN), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_3x4()
381 const float32x4_t vi4x68AC = vextq_f32(vi4x0246, vi4x8ACE, 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_3x4() local
389 vo2p0 = vfmaq_lane_f32(vo2p0, vi4x68AC, vget_low_f32(vw0123), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_3x4()
396 vo1p0 = vfmaq_lane_f32(vo1p0, vi4x68AC, vget_high_f32(vw89AB), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_3x4()
403 vo0p0 = vfmaq_lane_f32(vo0p0, vi4x68AC, vget_low_f32(vwKLMN), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_3x4()
D5x5s2p2-minmax-neon-3x4-acc2.c177 const float32x4_t vi4x68AC = vextq_f32(vi4x0246, vi4x8ACE9BDF.val[0], 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_3x4_acc2() local
190 vo2p0 = vmlaq_lane_f32(vo2p0, vi4x68AC, vget_low_f32(vw0123), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_3x4_acc2()
197 vo1p0 = vmlaq_lane_f32(vo1p0, vi4x68AC, vget_high_f32(vw89AB), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_3x4_acc2()
204 vo0p0 = vmlaq_lane_f32(vo0p0, vi4x68AC, vget_low_f32(vwKLMN), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_3x4_acc2()
384 const float32x4_t vi4x68AC = vextq_f32(vi4x0246, vi4x8ACE, 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_3x4_acc2() local
392 vo2p0 = vmlaq_lane_f32(vo2p0, vi4x68AC, vget_low_f32(vw0123), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_3x4_acc2()
399 vo1p0 = vmlaq_lane_f32(vo1p0, vi4x68AC, vget_high_f32(vw89AB), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_3x4_acc2()
406 vo0p0 = vmlaq_lane_f32(vo0p0, vi4x68AC, vget_low_f32(vwKLMN), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_3x4_acc2()
D5x5s2p2-minmax-neon-2x4-acc2.c150 const float32x4_t vi4x68AC = vextq_f32(vi4x0246, vi4x8ACE9BDF.val[0], 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_2x4_acc2() local
164 vo1p0 = vmlaq_lane_f32(vo1p0, vi4x68AC, vget_high_f32(vw89AB), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_2x4_acc2()
169 vo0p0 = vmlaq_lane_f32(vo0p0, vi4x68AC, vget_low_f32(vwKLMN), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_2x4_acc2()
309 const float32x4_t vi4x68AC = vextq_f32(vi4x0246, vi4x8ACE, 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_2x4_acc2() local
320 vo1p0 = vmlaq_lane_f32(vo1p0, vi4x68AC, vget_high_f32(vw89AB), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_2x4_acc2()
325 vo0p0 = vmlaq_lane_f32(vo0p0, vi4x68AC, vget_low_f32(vwKLMN), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_2x4_acc2()
D5x5s2p2-minmax-neonfma-2x4-acc2.c150 const float32x4_t vi4x68AC = vextq_f32(vi4x0246, vi4x8ACE9BDF.val[0], 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_2x4_acc2() local
164 vo1p0 = vfmaq_lane_f32(vo1p0, vi4x68AC, vget_high_f32(vw89AB), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_2x4_acc2()
169 vo0p0 = vfmaq_lane_f32(vo0p0, vi4x68AC, vget_low_f32(vwKLMN), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_2x4_acc2()
309 const float32x4_t vi4x68AC = vextq_f32(vi4x0246, vi4x8ACE, 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_2x4_acc2() local
320 vo1p0 = vfmaq_lane_f32(vo1p0, vi4x68AC, vget_high_f32(vw89AB), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_2x4_acc2()
325 vo0p0 = vfmaq_lane_f32(vo0p0, vi4x68AC, vget_low_f32(vwKLMN), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_2x4_acc2()
D5x5s2p2-minmax-neon-2x4.c150 const float32x4_t vi4x68AC = vextq_f32(vi4x0246, vi4x8ACE9BDF.val[0], 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_2x4() local
164 vo1p0 = vmlaq_lane_f32(vo1p0, vi4x68AC, vget_high_f32(vw89AB), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_2x4()
169 vo0p0 = vmlaq_lane_f32(vo0p0, vi4x68AC, vget_low_f32(vwKLMN), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_2x4()
307 const float32x4_t vi4x68AC = vextq_f32(vi4x0246, vi4x8ACE, 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_2x4() local
318 vo1p0 = vmlaq_lane_f32(vo1p0, vi4x68AC, vget_high_f32(vw89AB), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_2x4()
323 vo0p0 = vmlaq_lane_f32(vo0p0, vi4x68AC, vget_low_f32(vwKLMN), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_2x4()
D5x5s2p2-minmax-neon-2x4-acc3.c150 const float32x4_t vi4x68AC = vextq_f32(vi4x0246, vi4x8ACE9BDF.val[0], 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_2x4_acc3() local
164 vo1p2 = vmlaq_lane_f32(vo1p2, vi4x68AC, vget_high_f32(vw89AB), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_2x4_acc3()
169 vo0p1 = vmlaq_lane_f32(vo0p1, vi4x68AC, vget_low_f32(vwKLMN), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_2x4_acc3()
311 const float32x4_t vi4x68AC = vextq_f32(vi4x0246, vi4x8ACE, 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_2x4_acc3() local
322 vo1p2 = vmlaq_lane_f32(vo1p2, vi4x68AC, vget_high_f32(vw89AB), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_2x4_acc3()
327 vo0p1 = vmlaq_lane_f32(vo0p1, vi4x68AC, vget_low_f32(vwKLMN), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_2x4_acc3()
D5x5s2p2-minmax-neonfma-2x4-acc3.c150 const float32x4_t vi4x68AC = vextq_f32(vi4x0246, vi4x8ACE9BDF.val[0], 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_2x4_acc3() local
164 vo1p2 = vfmaq_lane_f32(vo1p2, vi4x68AC, vget_high_f32(vw89AB), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_2x4_acc3()
169 vo0p1 = vfmaq_lane_f32(vo0p1, vi4x68AC, vget_low_f32(vwKLMN), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_2x4_acc3()
311 const float32x4_t vi4x68AC = vextq_f32(vi4x0246, vi4x8ACE, 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_2x4_acc3() local
322 vo1p2 = vfmaq_lane_f32(vo1p2, vi4x68AC, vget_high_f32(vw89AB), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_2x4_acc3()
327 vo0p1 = vfmaq_lane_f32(vo0p1, vi4x68AC, vget_low_f32(vwKLMN), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_2x4_acc3()
D5x5s2p2-minmax-neonfma-2x4.c150 const float32x4_t vi4x68AC = vextq_f32(vi4x0246, vi4x8ACE9BDF.val[0], 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_2x4() local
164 vo1p0 = vfmaq_lane_f32(vo1p0, vi4x68AC, vget_high_f32(vw89AB), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_2x4()
169 vo0p0 = vfmaq_lane_f32(vo0p0, vi4x68AC, vget_low_f32(vwKLMN), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_2x4()
307 const float32x4_t vi4x68AC = vextq_f32(vi4x0246, vi4x8ACE, 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_2x4() local
318 vo1p0 = vfmaq_lane_f32(vo1p0, vi4x68AC, vget_high_f32(vw89AB), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_2x4()
323 vo0p0 = vfmaq_lane_f32(vo0p0, vi4x68AC, vget_low_f32(vwKLMN), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_2x4()
D5x5s2p2-minmax-neon-1x4-acc5.c122 const float32x4_t vi4x68AC = vextq_f32(vi4x0246, vi4x8ACE9BDF.val[0], 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_1x4_acc5() local
133 vo0p1 = vmlaq_lane_f32(vo0p1, vi4x68AC, vget_low_f32(vwKLMN), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_1x4_acc5()
236 const float32x4_t vi4x68AC = vextq_f32(vi4x0246, vi4x8ACE, 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_1x4_acc5() local
246 vo0p1 = vmlaq_lane_f32(vo0p1, vi4x68AC, vget_low_f32(vwKLMN), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_1x4_acc5()
D5x5s2p2-minmax-neonfma-1x4-acc5.c122 const float32x4_t vi4x68AC = vextq_f32(vi4x0246, vi4x8ACE9BDF.val[0], 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_1x4_acc5() local
133 vo0p1 = vfmaq_lane_f32(vo0p1, vi4x68AC, vget_low_f32(vwKLMN), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_1x4_acc5()
236 const float32x4_t vi4x68AC = vextq_f32(vi4x0246, vi4x8ACE, 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_1x4_acc5() local
246 vo0p1 = vfmaq_lane_f32(vo0p1, vi4x68AC, vget_low_f32(vwKLMN), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_1x4_acc5()
D5x5s2p2-minmax-wasmsimd-x86-loadsplat-3x4-acc2.c237 const v128_t vi4x68AC = wasm_v32x4_shuffle(vi4x0246, vi4x8ACE, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_loadsplat_3x4_acc2() local
250 vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi4x68AC, vk00)); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_loadsplat_3x4_acc2()
257 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi4x68AC, vk20)); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_loadsplat_3x4_acc2()
264 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi4x68AC, vk40)); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_loadsplat_3x4_acc2()
489 const v128_t vi4x68AC = wasm_v32x4_shuffle(vi4x0246, vi4x8ACE, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_loadsplat_3x4_acc2() local
497 vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi4x68AC, vk00)); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_loadsplat_3x4_acc2()
504 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi4x68AC, vk20)); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_loadsplat_3x4_acc2()
511 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi4x68AC, vk40)); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_loadsplat_3x4_acc2()
D5x5s2p2-minmax-wasmsimd-x86-loadsplat-3x4.c237 const v128_t vi4x68AC = wasm_v32x4_shuffle(vi4x0246, vi4x8ACE, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_loadsplat_3x4() local
250 vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi4x68AC, vk00)); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_loadsplat_3x4()
257 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi4x68AC, vk20)); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_loadsplat_3x4()
264 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi4x68AC, vk40)); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_loadsplat_3x4()
486 const v128_t vi4x68AC = wasm_v32x4_shuffle(vi4x0246, vi4x8ACE, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_loadsplat_3x4() local
494 vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi4x68AC, vk00)); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_loadsplat_3x4()
501 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi4x68AC, vk20)); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_loadsplat_3x4()
508 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi4x68AC, vk40)); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_loadsplat_3x4()
D5x5s2p2-minmax-wasmsimd-arm-loadsplat-3x4-acc2.c237 const v128_t vi4x68AC = wasm_v32x4_shuffle(vi4x0246, vi4x8ACE, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_loadsplat_3x4_acc2() local
250 vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi4x68AC, vk00)); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_loadsplat_3x4_acc2()
257 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi4x68AC, vk20)); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_loadsplat_3x4_acc2()
264 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi4x68AC, vk40)); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_loadsplat_3x4_acc2()
489 const v128_t vi4x68AC = wasm_v32x4_shuffle(vi4x0246, vi4x8ACE, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_loadsplat_3x4_acc2() local
497 vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi4x68AC, vk00)); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_loadsplat_3x4_acc2()
504 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi4x68AC, vk20)); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_loadsplat_3x4_acc2()
511 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi4x68AC, vk40)); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_loadsplat_3x4_acc2()
D5x5s2p2-minmax-wasmsimd-arm-loadsplat-3x4.c237 const v128_t vi4x68AC = wasm_v32x4_shuffle(vi4x0246, vi4x8ACE, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_loadsplat_3x4() local
250 vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi4x68AC, vk00)); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_loadsplat_3x4()
257 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi4x68AC, vk20)); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_loadsplat_3x4()
264 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi4x68AC, vk40)); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_loadsplat_3x4()
486 const v128_t vi4x68AC = wasm_v32x4_shuffle(vi4x0246, vi4x8ACE, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_loadsplat_3x4() local
494 vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi4x68AC, vk00)); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_loadsplat_3x4()
501 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi4x68AC, vk20)); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_loadsplat_3x4()
508 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi4x68AC, vk40)); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_loadsplat_3x4()
D5x5s2p2-minmax-wasmsimd-arm-loadsplat-2x4.c202 const v128_t vi4x68AC = wasm_v32x4_shuffle(vi4x0246, vi4x8ACE, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_loadsplat_2x4() local
216 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi4x68AC, vk20)); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_loadsplat_2x4()
221 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi4x68AC, vk40)); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_loadsplat_2x4()
394 const v128_t vi4x68AC = wasm_v32x4_shuffle(vi4x0246, vi4x8ACE, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_loadsplat_2x4() local
405 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi4x68AC, vk20)); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_loadsplat_2x4()
410 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi4x68AC, vk40)); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_loadsplat_2x4()

123