Home
last modified time | relevance | path

Searched refs:vi3x6789 (Results 1 – 25 of 91) sorted by relevance

1234

/external/XNNPACK/src/f32-dwconv2d-chw/gen/
D5x5p2-minmax-neon-4x4.c257 const float32x4_t vi3x6789 = vextq_f32(vi3x4567, vi3x89AB, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_4x4() local
271 vo3p0 = vmlaq_lane_f32(vo3p0, vi3x6789, vget_low_f32(vw4567), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_4x4()
275 vo2p0 = vmlaq_lane_f32(vo2p0, vi3x6789, vget_high_f32(vw89AB), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_4x4()
279 vo1p0 = vmlaq_lane_f32(vo1p0, vi3x6789, vget_high_f32(vwCDEF), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_4x4()
283 vo0p0 = vmlaq_lane_f32(vo0p0, vi3x6789, vget_low_f32(vwKLMN), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_4x4()
475 const float32x4_t vi3x6789 = vextq_f32(vi3x4567, vi3x89AB, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_4x4() local
489 vo3p0 = vmlaq_lane_f32(vo3p0, vi3x6789, vget_low_f32(vw4567), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_4x4()
493 vo2p0 = vmlaq_lane_f32(vo2p0, vi3x6789, vget_high_f32(vw89AB), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_4x4()
497 vo1p0 = vmlaq_lane_f32(vo1p0, vi3x6789, vget_high_f32(vwCDEF), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_4x4()
501 vo0p0 = vmlaq_lane_f32(vo0p0, vi3x6789, vget_low_f32(vwKLMN), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_4x4()
[all …]
D5x5p2-minmax-neon-4x4-acc2.c257 const float32x4_t vi3x6789 = vextq_f32(vi3x4567, vi3x89AB, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_4x4_acc2() local
271 vo3p0 = vmlaq_lane_f32(vo3p0, vi3x6789, vget_low_f32(vw4567), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_4x4_acc2()
275 vo2p1 = vmlaq_lane_f32(vo2p1, vi3x6789, vget_high_f32(vw89AB), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_4x4_acc2()
279 vo1p0 = vmlaq_lane_f32(vo1p0, vi3x6789, vget_high_f32(vwCDEF), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_4x4_acc2()
283 vo0p1 = vmlaq_lane_f32(vo0p1, vi3x6789, vget_low_f32(vwKLMN), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_4x4_acc2()
479 const float32x4_t vi3x6789 = vextq_f32(vi3x4567, vi3x89AB, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_4x4_acc2() local
493 vo3p0 = vmlaq_lane_f32(vo3p0, vi3x6789, vget_low_f32(vw4567), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_4x4_acc2()
497 vo2p1 = vmlaq_lane_f32(vo2p1, vi3x6789, vget_high_f32(vw89AB), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_4x4_acc2()
501 vo1p0 = vmlaq_lane_f32(vo1p0, vi3x6789, vget_high_f32(vwCDEF), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_4x4_acc2()
505 vo0p1 = vmlaq_lane_f32(vo0p1, vi3x6789, vget_low_f32(vwKLMN), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_4x4_acc2()
[all …]
D5x5p2-minmax-neonfma-4x4.c257 const float32x4_t vi3x6789 = vextq_f32(vi3x4567, vi3x89AB, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_4x4() local
271 vo3p0 = vfmaq_lane_f32(vo3p0, vi3x6789, vget_low_f32(vw4567), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_4x4()
275 vo2p0 = vfmaq_lane_f32(vo2p0, vi3x6789, vget_high_f32(vw89AB), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_4x4()
279 vo1p0 = vfmaq_lane_f32(vo1p0, vi3x6789, vget_high_f32(vwCDEF), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_4x4()
283 vo0p0 = vfmaq_lane_f32(vo0p0, vi3x6789, vget_low_f32(vwKLMN), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_4x4()
475 const float32x4_t vi3x6789 = vextq_f32(vi3x4567, vi3x89AB, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_4x4() local
489 vo3p0 = vfmaq_lane_f32(vo3p0, vi3x6789, vget_low_f32(vw4567), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_4x4()
493 vo2p0 = vfmaq_lane_f32(vo2p0, vi3x6789, vget_high_f32(vw89AB), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_4x4()
497 vo1p0 = vfmaq_lane_f32(vo1p0, vi3x6789, vget_high_f32(vwCDEF), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_4x4()
501 vo0p0 = vfmaq_lane_f32(vo0p0, vi3x6789, vget_low_f32(vwKLMN), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_4x4()
[all …]
D5x5p2-minmax-neonfma-4x4-acc2.c257 const float32x4_t vi3x6789 = vextq_f32(vi3x4567, vi3x89AB, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_4x4_acc2() local
271 vo3p0 = vfmaq_lane_f32(vo3p0, vi3x6789, vget_low_f32(vw4567), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_4x4_acc2()
275 vo2p1 = vfmaq_lane_f32(vo2p1, vi3x6789, vget_high_f32(vw89AB), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_4x4_acc2()
279 vo1p0 = vfmaq_lane_f32(vo1p0, vi3x6789, vget_high_f32(vwCDEF), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_4x4_acc2()
283 vo0p1 = vfmaq_lane_f32(vo0p1, vi3x6789, vget_low_f32(vwKLMN), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_4x4_acc2()
479 const float32x4_t vi3x6789 = vextq_f32(vi3x4567, vi3x89AB, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_4x4_acc2() local
493 vo3p0 = vfmaq_lane_f32(vo3p0, vi3x6789, vget_low_f32(vw4567), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_4x4_acc2()
497 vo2p1 = vfmaq_lane_f32(vo2p1, vi3x6789, vget_high_f32(vw89AB), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_4x4_acc2()
501 vo1p0 = vfmaq_lane_f32(vo1p0, vi3x6789, vget_high_f32(vwCDEF), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_4x4_acc2()
505 vo0p1 = vfmaq_lane_f32(vo0p1, vi3x6789, vget_low_f32(vwKLMN), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_4x4_acc2()
[all …]
D5x5p2-minmax-neonfma-1x4-acc2.c155 const float32x4_t vi3x6789 = vextq_f32(vi3x4567, vi3x89AB, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_1x4_acc2() local
166 vo0p1 = vfmaq_lane_f32(vo0p1, vi3x6789, vget_low_f32(vwKLMN), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_1x4_acc2()
263 const float32x4_t vi3x6789 = vextq_f32(vi3x4567, vi3x89AB, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_1x4_acc2() local
274 vo0p1 = vfmaq_lane_f32(vo0p1, vi3x6789, vget_low_f32(vwKLMN), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_1x4_acc2()
361 const float32x4_t vi3x6789 = vextq_f32(vi3x5678, vzero, 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_1x4_acc2() local
370 vo0p1 = vfmaq_lane_f32(vo0p1, vi3x6789, vget_low_f32(vwKLMN), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_1x4_acc2()
D5x5p2-minmax-neonfma-1x4-acc3.c155 const float32x4_t vi3x6789 = vextq_f32(vi3x4567, vi3x89AB, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_1x4_acc3() local
166 vo0p1 = vfmaq_lane_f32(vo0p1, vi3x6789, vget_low_f32(vwKLMN), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_1x4_acc3()
264 const float32x4_t vi3x6789 = vextq_f32(vi3x4567, vi3x89AB, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_1x4_acc3() local
275 vo0p1 = vfmaq_lane_f32(vo0p1, vi3x6789, vget_low_f32(vwKLMN), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_1x4_acc3()
363 const float32x4_t vi3x6789 = vextq_f32(vi3x5678, vzero, 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_1x4_acc3() local
372 vo0p1 = vfmaq_lane_f32(vo0p1, vi3x6789, vget_low_f32(vwKLMN), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_1x4_acc3()
D5x5p2-minmax-neon-1x4.c155 const float32x4_t vi3x6789 = vextq_f32(vi3x4567, vi3x89AB, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_1x4() local
166 vo0p0 = vmlaq_lane_f32(vo0p0, vi3x6789, vget_low_f32(vwKLMN), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_1x4()
262 const float32x4_t vi3x6789 = vextq_f32(vi3x4567, vi3x89AB, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_1x4() local
273 vo0p0 = vmlaq_lane_f32(vo0p0, vi3x6789, vget_low_f32(vwKLMN), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_1x4()
359 const float32x4_t vi3x6789 = vextq_f32(vi3x5678, vzero, 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_1x4() local
368 vo0p0 = vmlaq_lane_f32(vo0p0, vi3x6789, vget_low_f32(vwKLMN), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_1x4()
D5x5p2-minmax-neonfma-1x4.c155 const float32x4_t vi3x6789 = vextq_f32(vi3x4567, vi3x89AB, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_1x4() local
166 vo0p0 = vfmaq_lane_f32(vo0p0, vi3x6789, vget_low_f32(vwKLMN), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_1x4()
262 const float32x4_t vi3x6789 = vextq_f32(vi3x4567, vi3x89AB, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_1x4() local
273 vo0p0 = vfmaq_lane_f32(vo0p0, vi3x6789, vget_low_f32(vwKLMN), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_1x4()
359 const float32x4_t vi3x6789 = vextq_f32(vi3x5678, vzero, 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_1x4() local
368 vo0p0 = vfmaq_lane_f32(vo0p0, vi3x6789, vget_low_f32(vwKLMN), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_1x4()
D5x5p2-minmax-neon-1x4-acc3.c155 const float32x4_t vi3x6789 = vextq_f32(vi3x4567, vi3x89AB, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_1x4_acc3() local
166 vo0p1 = vmlaq_lane_f32(vo0p1, vi3x6789, vget_low_f32(vwKLMN), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_1x4_acc3()
264 const float32x4_t vi3x6789 = vextq_f32(vi3x4567, vi3x89AB, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_1x4_acc3() local
275 vo0p1 = vmlaq_lane_f32(vo0p1, vi3x6789, vget_low_f32(vwKLMN), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_1x4_acc3()
363 const float32x4_t vi3x6789 = vextq_f32(vi3x5678, vzero, 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_1x4_acc3() local
372 vo0p1 = vmlaq_lane_f32(vo0p1, vi3x6789, vget_low_f32(vwKLMN), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_1x4_acc3()
D5x5p2-minmax-neon-1x4-acc2.c155 const float32x4_t vi3x6789 = vextq_f32(vi3x4567, vi3x89AB, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_1x4_acc2() local
166 vo0p1 = vmlaq_lane_f32(vo0p1, vi3x6789, vget_low_f32(vwKLMN), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_1x4_acc2()
263 const float32x4_t vi3x6789 = vextq_f32(vi3x4567, vi3x89AB, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_1x4_acc2() local
274 vo0p1 = vmlaq_lane_f32(vo0p1, vi3x6789, vget_low_f32(vwKLMN), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_1x4_acc2()
361 const float32x4_t vi3x6789 = vextq_f32(vi3x5678, vzero, 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_1x4_acc2() local
370 vo0p1 = vmlaq_lane_f32(vo0p1, vi3x6789, vget_low_f32(vwKLMN), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_1x4_acc2()
D5x5p2-minmax-neon-2x4-acc2.c189 const float32x4_t vi3x6789 = vextq_f32(vi3x4567, vi3x89AB, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_2x4_acc2() local
203 vo1p0 = vmlaq_lane_f32(vo1p0, vi3x6789, vget_high_f32(vwCDEF), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_2x4_acc2()
205 vo0p1 = vmlaq_lane_f32(vo0p1, vi3x6789, vget_low_f32(vwKLMN), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_2x4_acc2()
335 const float32x4_t vi3x6789 = vextq_f32(vi3x4567, vi3x89AB, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_2x4_acc2() local
349 vo1p0 = vmlaq_lane_f32(vo1p0, vi3x6789, vget_high_f32(vwCDEF), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_2x4_acc2()
351 vo0p1 = vmlaq_lane_f32(vo0p1, vi3x6789, vget_low_f32(vwKLMN), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_2x4_acc2()
469 const float32x4_t vi3x6789 = vextq_f32(vi3x5678, vzero, 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_2x4_acc2() local
480 vo1p0 = vmlaq_lane_f32(vo1p0, vi3x6789, vget_high_f32(vwCDEF), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_2x4_acc2()
482 vo0p1 = vmlaq_lane_f32(vo0p1, vi3x6789, vget_low_f32(vwKLMN), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_2x4_acc2()
D5x5p2-minmax-neonfma-2x4.c189 const float32x4_t vi3x6789 = vextq_f32(vi3x4567, vi3x89AB, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_2x4() local
203 vo1p0 = vfmaq_lane_f32(vo1p0, vi3x6789, vget_high_f32(vwCDEF), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_2x4()
205 vo0p0 = vfmaq_lane_f32(vo0p0, vi3x6789, vget_low_f32(vwKLMN), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_2x4()
333 const float32x4_t vi3x6789 = vextq_f32(vi3x4567, vi3x89AB, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_2x4() local
347 vo1p0 = vfmaq_lane_f32(vo1p0, vi3x6789, vget_high_f32(vwCDEF), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_2x4()
349 vo0p0 = vfmaq_lane_f32(vo0p0, vi3x6789, vget_low_f32(vwKLMN), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_2x4()
465 const float32x4_t vi3x6789 = vextq_f32(vi3x5678, vzero, 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_2x4() local
476 vo1p0 = vfmaq_lane_f32(vo1p0, vi3x6789, vget_high_f32(vwCDEF), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_2x4()
478 vo0p0 = vfmaq_lane_f32(vo0p0, vi3x6789, vget_low_f32(vwKLMN), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_2x4()
D5x5p2-minmax-neon-2x4.c189 const float32x4_t vi3x6789 = vextq_f32(vi3x4567, vi3x89AB, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_2x4() local
203 vo1p0 = vmlaq_lane_f32(vo1p0, vi3x6789, vget_high_f32(vwCDEF), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_2x4()
205 vo0p0 = vmlaq_lane_f32(vo0p0, vi3x6789, vget_low_f32(vwKLMN), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_2x4()
333 const float32x4_t vi3x6789 = vextq_f32(vi3x4567, vi3x89AB, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_2x4() local
347 vo1p0 = vmlaq_lane_f32(vo1p0, vi3x6789, vget_high_f32(vwCDEF), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_2x4()
349 vo0p0 = vmlaq_lane_f32(vo0p0, vi3x6789, vget_low_f32(vwKLMN), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_2x4()
465 const float32x4_t vi3x6789 = vextq_f32(vi3x5678, vzero, 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_2x4() local
476 vo1p0 = vmlaq_lane_f32(vo1p0, vi3x6789, vget_high_f32(vwCDEF), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_2x4()
478 vo0p0 = vmlaq_lane_f32(vo0p0, vi3x6789, vget_low_f32(vwKLMN), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_2x4()
D5x5p2-minmax-neonfma-2x4-acc2.c189 const float32x4_t vi3x6789 = vextq_f32(vi3x4567, vi3x89AB, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_2x4_acc2() local
203 vo1p0 = vfmaq_lane_f32(vo1p0, vi3x6789, vget_high_f32(vwCDEF), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_2x4_acc2()
205 vo0p1 = vfmaq_lane_f32(vo0p1, vi3x6789, vget_low_f32(vwKLMN), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_2x4_acc2()
335 const float32x4_t vi3x6789 = vextq_f32(vi3x4567, vi3x89AB, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_2x4_acc2() local
349 vo1p0 = vfmaq_lane_f32(vo1p0, vi3x6789, vget_high_f32(vwCDEF), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_2x4_acc2()
351 vo0p1 = vfmaq_lane_f32(vo0p1, vi3x6789, vget_low_f32(vwKLMN), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_2x4_acc2()
469 const float32x4_t vi3x6789 = vextq_f32(vi3x5678, vzero, 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_2x4_acc2() local
480 vo1p0 = vfmaq_lane_f32(vo1p0, vi3x6789, vget_high_f32(vwCDEF), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_2x4_acc2()
482 vo0p1 = vfmaq_lane_f32(vo0p1, vi3x6789, vget_low_f32(vwKLMN), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_2x4_acc2()
D5x5p2-minmax-neonfma-3x4.c223 const float32x4_t vi3x6789 = vextq_f32(vi3x4567, vi3x89AB, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_3x4() local
238 vo2p0 = vfmaq_lane_f32(vo2p0, vi3x6789, vget_high_f32(vw89AB), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_3x4()
241 vo1p0 = vfmaq_lane_f32(vo1p0, vi3x6789, vget_high_f32(vwCDEF), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_3x4()
244 vo0p0 = vfmaq_lane_f32(vo0p0, vi3x6789, vget_low_f32(vwKLMN), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_3x4()
404 const float32x4_t vi3x6789 = vextq_f32(vi3x4567, vi3x89AB, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_3x4() local
419 vo2p0 = vfmaq_lane_f32(vo2p0, vi3x6789, vget_high_f32(vw89AB), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_3x4()
422 vo1p0 = vfmaq_lane_f32(vo1p0, vi3x6789, vget_high_f32(vwCDEF), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_3x4()
425 vo0p0 = vfmaq_lane_f32(vo0p0, vi3x6789, vget_low_f32(vwKLMN), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_3x4()
571 const float32x4_t vi3x6789 = vextq_f32(vi3x5678, vzero, 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_3x4() local
582 vo2p0 = vfmaq_lane_f32(vo2p0, vi3x6789, vget_high_f32(vw89AB), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_3x4()
[all …]
D5x5p2-minmax-wasmsimd-arm-loadsplat-3x4.c252 const v128_t vi3x6789 = wasm_v32x4_shuffle(vi3x4567, vi3x89AB, 2, 3, 4, 5); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_3x4() local
267 vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi3x6789, vk14)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_3x4()
270 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi3x6789, vk24)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_3x4()
273 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi3x6789, vk34)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_3x4()
432 const v128_t vi3x6789 = wasm_v32x4_shuffle(vi3x4567, vi3x89AB, 2, 3, 4, 5); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_3x4() local
447 vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi3x6789, vk14)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_3x4()
450 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi3x6789, vk24)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_3x4()
453 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi3x6789, vk34)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_3x4()
597 const v128_t vi3x6789 = wasm_v32x4_shuffle(vi3x5678, vzero, 1, 2, 3, 4); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_3x4() local
608 vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi3x6789, vk14)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_3x4()
[all …]
D5x5p2-minmax-wasmsimd-arm-loadsplat-3x4-acc2.c252 const v128_t vi3x6789 = wasm_v32x4_shuffle(vi3x4567, vi3x89AB, 2, 3, 4, 5); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_3x4_acc2() local
267 vo2p1 = wasm_f32x4_add(vo2p1, wasm_f32x4_mul(vi3x6789, vk14)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_3x4_acc2()
270 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi3x6789, vk24)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_3x4_acc2()
273 vo0p1 = wasm_f32x4_add(vo0p1, wasm_f32x4_mul(vi3x6789, vk34)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_3x4_acc2()
435 const v128_t vi3x6789 = wasm_v32x4_shuffle(vi3x4567, vi3x89AB, 2, 3, 4, 5); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_3x4_acc2() local
450 vo2p1 = wasm_f32x4_add(vo2p1, wasm_f32x4_mul(vi3x6789, vk14)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_3x4_acc2()
453 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi3x6789, vk24)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_3x4_acc2()
456 vo0p1 = wasm_f32x4_add(vo0p1, wasm_f32x4_mul(vi3x6789, vk34)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_3x4_acc2()
603 const v128_t vi3x6789 = wasm_v32x4_shuffle(vi3x5678, vzero, 1, 2, 3, 4); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_3x4_acc2() local
614 vo2p1 = wasm_f32x4_add(vo2p1, wasm_f32x4_mul(vi3x6789, vk14)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_3x4_acc2()
[all …]
D5x5p2-minmax-wasmsimd-x86-loadsplat-3x4.c252 const v128_t vi3x6789 = wasm_v32x4_shuffle(vi3x4567, vi3x89AB, 2, 3, 4, 5); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_3x4() local
267 vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi3x6789, vk14)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_3x4()
270 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi3x6789, vk24)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_3x4()
273 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi3x6789, vk34)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_3x4()
432 const v128_t vi3x6789 = wasm_v32x4_shuffle(vi3x4567, vi3x89AB, 2, 3, 4, 5); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_3x4() local
447 vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi3x6789, vk14)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_3x4()
450 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi3x6789, vk24)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_3x4()
453 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi3x6789, vk34)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_3x4()
597 const v128_t vi3x6789 = wasm_v32x4_shuffle(vi3x5678, vzero, 1, 2, 3, 4); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_3x4() local
608 vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi3x6789, vk14)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_3x4()
[all …]
D5x5p2-minmax-neon-3x4-acc2.c223 const float32x4_t vi3x6789 = vextq_f32(vi3x4567, vi3x89AB, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_3x4_acc2() local
238 vo2p1 = vmlaq_lane_f32(vo2p1, vi3x6789, vget_high_f32(vw89AB), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_3x4_acc2()
241 vo1p0 = vmlaq_lane_f32(vo1p0, vi3x6789, vget_high_f32(vwCDEF), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_3x4_acc2()
244 vo0p1 = vmlaq_lane_f32(vo0p1, vi3x6789, vget_low_f32(vwKLMN), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_3x4_acc2()
407 const float32x4_t vi3x6789 = vextq_f32(vi3x4567, vi3x89AB, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_3x4_acc2() local
422 vo2p1 = vmlaq_lane_f32(vo2p1, vi3x6789, vget_high_f32(vw89AB), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_3x4_acc2()
425 vo1p0 = vmlaq_lane_f32(vo1p0, vi3x6789, vget_high_f32(vwCDEF), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_3x4_acc2()
428 vo0p1 = vmlaq_lane_f32(vo0p1, vi3x6789, vget_low_f32(vwKLMN), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_3x4_acc2()
577 const float32x4_t vi3x6789 = vextq_f32(vi3x5678, vzero, 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_3x4_acc2() local
588 vo2p1 = vmlaq_lane_f32(vo2p1, vi3x6789, vget_high_f32(vw89AB), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_3x4_acc2()
[all …]
D5x5p2-minmax-neon-3x4.c223 const float32x4_t vi3x6789 = vextq_f32(vi3x4567, vi3x89AB, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_3x4() local
238 vo2p0 = vmlaq_lane_f32(vo2p0, vi3x6789, vget_high_f32(vw89AB), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_3x4()
241 vo1p0 = vmlaq_lane_f32(vo1p0, vi3x6789, vget_high_f32(vwCDEF), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_3x4()
244 vo0p0 = vmlaq_lane_f32(vo0p0, vi3x6789, vget_low_f32(vwKLMN), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_3x4()
404 const float32x4_t vi3x6789 = vextq_f32(vi3x4567, vi3x89AB, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_3x4() local
419 vo2p0 = vmlaq_lane_f32(vo2p0, vi3x6789, vget_high_f32(vw89AB), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_3x4()
422 vo1p0 = vmlaq_lane_f32(vo1p0, vi3x6789, vget_high_f32(vwCDEF), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_3x4()
425 vo0p0 = vmlaq_lane_f32(vo0p0, vi3x6789, vget_low_f32(vwKLMN), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_3x4()
571 const float32x4_t vi3x6789 = vextq_f32(vi3x5678, vzero, 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_3x4() local
582 vo2p0 = vmlaq_lane_f32(vo2p0, vi3x6789, vget_high_f32(vw89AB), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_3x4()
[all …]
D5x5p2-minmax-neonfma-3x4-acc2.c223 const float32x4_t vi3x6789 = vextq_f32(vi3x4567, vi3x89AB, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_3x4_acc2() local
238 vo2p1 = vfmaq_lane_f32(vo2p1, vi3x6789, vget_high_f32(vw89AB), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_3x4_acc2()
241 vo1p0 = vfmaq_lane_f32(vo1p0, vi3x6789, vget_high_f32(vwCDEF), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_3x4_acc2()
244 vo0p1 = vfmaq_lane_f32(vo0p1, vi3x6789, vget_low_f32(vwKLMN), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_3x4_acc2()
407 const float32x4_t vi3x6789 = vextq_f32(vi3x4567, vi3x89AB, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_3x4_acc2() local
422 vo2p1 = vfmaq_lane_f32(vo2p1, vi3x6789, vget_high_f32(vw89AB), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_3x4_acc2()
425 vo1p0 = vfmaq_lane_f32(vo1p0, vi3x6789, vget_high_f32(vwCDEF), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_3x4_acc2()
428 vo0p1 = vfmaq_lane_f32(vo0p1, vi3x6789, vget_low_f32(vwKLMN), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_3x4_acc2()
577 const float32x4_t vi3x6789 = vextq_f32(vi3x5678, vzero, 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_3x4_acc2() local
588 vo2p1 = vfmaq_lane_f32(vo2p1, vi3x6789, vget_high_f32(vw89AB), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_3x4_acc2()
[all …]
D5x5p2-minmax-wasmsimd-x86-loadsplat-4x4-acc2.c286 const v128_t vi3x6789 = wasm_v32x4_shuffle(vi3x4567, vi3x89AB, 2, 3, 4, 5); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_4x4_acc2() local
300 vo3p0 = wasm_f32x4_add(vo3p0, wasm_f32x4_mul(vi3x6789, vk04)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_4x4_acc2()
304 vo2p1 = wasm_f32x4_add(vo2p1, wasm_f32x4_mul(vi3x6789, vk14)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_4x4_acc2()
308 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi3x6789, vk24)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_4x4_acc2()
312 vo0p1 = wasm_f32x4_add(vo0p1, wasm_f32x4_mul(vi3x6789, vk34)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_4x4_acc2()
507 const v128_t vi3x6789 = wasm_v32x4_shuffle(vi3x4567, vi3x89AB, 2, 3, 4, 5); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_4x4_acc2() local
521 vo3p0 = wasm_f32x4_add(vo3p0, wasm_f32x4_mul(vi3x6789, vk04)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_4x4_acc2()
525 vo2p1 = wasm_f32x4_add(vo2p1, wasm_f32x4_mul(vi3x6789, vk14)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_4x4_acc2()
529 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi3x6789, vk24)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_4x4_acc2()
533 vo0p1 = wasm_f32x4_add(vo0p1, wasm_f32x4_mul(vi3x6789, vk34)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_4x4_acc2()
[all …]
D5x5p2-minmax-wasmsimd-arm-loadsplat-4x4.c286 const v128_t vi3x6789 = wasm_v32x4_shuffle(vi3x4567, vi3x89AB, 2, 3, 4, 5); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_4x4() local
300 vo3p0 = wasm_f32x4_add(vo3p0, wasm_f32x4_mul(vi3x6789, vk04)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_4x4()
304 vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi3x6789, vk14)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_4x4()
308 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi3x6789, vk24)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_4x4()
312 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi3x6789, vk34)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_4x4()
503 const v128_t vi3x6789 = wasm_v32x4_shuffle(vi3x4567, vi3x89AB, 2, 3, 4, 5); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_4x4() local
517 vo3p0 = wasm_f32x4_add(vo3p0, wasm_f32x4_mul(vi3x6789, vk04)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_4x4()
521 vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi3x6789, vk14)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_4x4()
525 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi3x6789, vk24)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_4x4()
529 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi3x6789, vk34)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_4x4()
[all …]
D5x5p2-minmax-wasmsimd-arm-loadsplat-4x4-acc2.c286 const v128_t vi3x6789 = wasm_v32x4_shuffle(vi3x4567, vi3x89AB, 2, 3, 4, 5); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_4x4_acc2() local
300 vo3p0 = wasm_f32x4_add(vo3p0, wasm_f32x4_mul(vi3x6789, vk04)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_4x4_acc2()
304 vo2p1 = wasm_f32x4_add(vo2p1, wasm_f32x4_mul(vi3x6789, vk14)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_4x4_acc2()
308 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi3x6789, vk24)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_4x4_acc2()
312 vo0p1 = wasm_f32x4_add(vo0p1, wasm_f32x4_mul(vi3x6789, vk34)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_4x4_acc2()
507 const v128_t vi3x6789 = wasm_v32x4_shuffle(vi3x4567, vi3x89AB, 2, 3, 4, 5); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_4x4_acc2() local
521 vo3p0 = wasm_f32x4_add(vo3p0, wasm_f32x4_mul(vi3x6789, vk04)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_4x4_acc2()
525 vo2p1 = wasm_f32x4_add(vo2p1, wasm_f32x4_mul(vi3x6789, vk14)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_4x4_acc2()
529 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi3x6789, vk24)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_4x4_acc2()
533 vo0p1 = wasm_f32x4_add(vo0p1, wasm_f32x4_mul(vi3x6789, vk34)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_4x4_acc2()
[all …]
D5x5p2-minmax-wasmsimd-x86-loadsplat-4x4.c286 const v128_t vi3x6789 = wasm_v32x4_shuffle(vi3x4567, vi3x89AB, 2, 3, 4, 5); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_4x4() local
300 vo3p0 = wasm_f32x4_add(vo3p0, wasm_f32x4_mul(vi3x6789, vk04)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_4x4()
304 vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi3x6789, vk14)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_4x4()
308 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi3x6789, vk24)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_4x4()
312 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi3x6789, vk34)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_4x4()
503 const v128_t vi3x6789 = wasm_v32x4_shuffle(vi3x4567, vi3x89AB, 2, 3, 4, 5); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_4x4() local
517 vo3p0 = wasm_f32x4_add(vo3p0, wasm_f32x4_mul(vi3x6789, vk04)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_4x4()
521 vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi3x6789, vk14)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_4x4()
525 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi3x6789, vk24)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_4x4()
529 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi3x6789, vk34)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_4x4()
[all …]

1234