/external/XNNPACK/src/f32-dwconv2d-chw/gen/ |
D | 5x5p2-minmax-neonfma-1x4-acc2.c | 153 const float32x4_t vi2x6789 = vextq_f32(vi2x4567, vi2x89AB, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_1x4_acc2() local 164 vo0p0 = vfmaq_lane_f32(vo0p0, vi2x6789, vget_high_f32(vwCDEF), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_1x4_acc2() 261 const float32x4_t vi2x6789 = vextq_f32(vi2x4567, vi2x89AB, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_1x4_acc2() local 272 vo0p0 = vfmaq_lane_f32(vo0p0, vi2x6789, vget_high_f32(vwCDEF), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_1x4_acc2() 360 const float32x4_t vi2x6789 = vextq_f32(vi2x5678, vzero, 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_1x4_acc2() local 368 vo0p0 = vfmaq_lane_f32(vo0p0, vi2x6789, vget_high_f32(vwCDEF), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_1x4_acc2()
|
D | 5x5p2-minmax-neonfma-1x4-acc3.c | 153 const float32x4_t vi2x6789 = vextq_f32(vi2x4567, vi2x89AB, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_1x4_acc3() local 164 vo0p0 = vfmaq_lane_f32(vo0p0, vi2x6789, vget_high_f32(vwCDEF), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_1x4_acc3() 262 const float32x4_t vi2x6789 = vextq_f32(vi2x4567, vi2x89AB, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_1x4_acc3() local 273 vo0p0 = vfmaq_lane_f32(vo0p0, vi2x6789, vget_high_f32(vwCDEF), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_1x4_acc3() 362 const float32x4_t vi2x6789 = vextq_f32(vi2x5678, vzero, 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_1x4_acc3() local 370 vo0p0 = vfmaq_lane_f32(vo0p0, vi2x6789, vget_high_f32(vwCDEF), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_1x4_acc3()
|
D | 5x5p2-minmax-neon-1x4.c | 153 const float32x4_t vi2x6789 = vextq_f32(vi2x4567, vi2x89AB, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_1x4() local 164 vo0p0 = vmlaq_lane_f32(vo0p0, vi2x6789, vget_high_f32(vwCDEF), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_1x4() 260 const float32x4_t vi2x6789 = vextq_f32(vi2x4567, vi2x89AB, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_1x4() local 271 vo0p0 = vmlaq_lane_f32(vo0p0, vi2x6789, vget_high_f32(vwCDEF), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_1x4() 358 const float32x4_t vi2x6789 = vextq_f32(vi2x5678, vzero, 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_1x4() local 366 vo0p0 = vmlaq_lane_f32(vo0p0, vi2x6789, vget_high_f32(vwCDEF), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_1x4()
|
D | 5x5p2-minmax-neonfma-1x4.c | 153 const float32x4_t vi2x6789 = vextq_f32(vi2x4567, vi2x89AB, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_1x4() local 164 vo0p0 = vfmaq_lane_f32(vo0p0, vi2x6789, vget_high_f32(vwCDEF), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_1x4() 260 const float32x4_t vi2x6789 = vextq_f32(vi2x4567, vi2x89AB, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_1x4() local 271 vo0p0 = vfmaq_lane_f32(vo0p0, vi2x6789, vget_high_f32(vwCDEF), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_1x4() 358 const float32x4_t vi2x6789 = vextq_f32(vi2x5678, vzero, 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_1x4() local 366 vo0p0 = vfmaq_lane_f32(vo0p0, vi2x6789, vget_high_f32(vwCDEF), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_1x4()
|
D | 5x5p2-minmax-neon-1x4-acc3.c | 153 const float32x4_t vi2x6789 = vextq_f32(vi2x4567, vi2x89AB, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_1x4_acc3() local 164 vo0p0 = vmlaq_lane_f32(vo0p0, vi2x6789, vget_high_f32(vwCDEF), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_1x4_acc3() 262 const float32x4_t vi2x6789 = vextq_f32(vi2x4567, vi2x89AB, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_1x4_acc3() local 273 vo0p0 = vmlaq_lane_f32(vo0p0, vi2x6789, vget_high_f32(vwCDEF), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_1x4_acc3() 362 const float32x4_t vi2x6789 = vextq_f32(vi2x5678, vzero, 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_1x4_acc3() local 370 vo0p0 = vmlaq_lane_f32(vo0p0, vi2x6789, vget_high_f32(vwCDEF), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_1x4_acc3()
|
D | 5x5p2-minmax-neon-1x4-acc2.c | 153 const float32x4_t vi2x6789 = vextq_f32(vi2x4567, vi2x89AB, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_1x4_acc2() local 164 vo0p0 = vmlaq_lane_f32(vo0p0, vi2x6789, vget_high_f32(vwCDEF), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_1x4_acc2() 261 const float32x4_t vi2x6789 = vextq_f32(vi2x4567, vi2x89AB, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_1x4_acc2() local 272 vo0p0 = vmlaq_lane_f32(vo0p0, vi2x6789, vget_high_f32(vwCDEF), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_1x4_acc2() 360 const float32x4_t vi2x6789 = vextq_f32(vi2x5678, vzero, 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_1x4_acc2() local 368 vo0p0 = vmlaq_lane_f32(vo0p0, vi2x6789, vget_high_f32(vwCDEF), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_1x4_acc2()
|
D | 5x5p2-minmax-neon-2x4-acc2.c | 187 const float32x4_t vi2x6789 = vextq_f32(vi2x4567, vi2x89AB, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_2x4_acc2() local 200 vo1p1 = vmlaq_lane_f32(vo1p1, vi2x6789, vget_high_f32(vw89AB), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_2x4_acc2() 202 vo0p0 = vmlaq_lane_f32(vo0p0, vi2x6789, vget_high_f32(vwCDEF), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_2x4_acc2() 333 const float32x4_t vi2x6789 = vextq_f32(vi2x4567, vi2x89AB, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_2x4_acc2() local 346 vo1p1 = vmlaq_lane_f32(vo1p1, vi2x6789, vget_high_f32(vw89AB), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_2x4_acc2() 348 vo0p0 = vmlaq_lane_f32(vo0p0, vi2x6789, vget_high_f32(vwCDEF), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_2x4_acc2() 468 const float32x4_t vi2x6789 = vextq_f32(vi2x5678, vzero, 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_2x4_acc2() local 477 vo1p1 = vmlaq_lane_f32(vo1p1, vi2x6789, vget_high_f32(vw89AB), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_2x4_acc2() 479 vo0p0 = vmlaq_lane_f32(vo0p0, vi2x6789, vget_high_f32(vwCDEF), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_2x4_acc2()
|
D | 5x5p2-minmax-neonfma-2x4.c | 187 const float32x4_t vi2x6789 = vextq_f32(vi2x4567, vi2x89AB, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_2x4() local 200 vo1p0 = vfmaq_lane_f32(vo1p0, vi2x6789, vget_high_f32(vw89AB), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_2x4() 202 vo0p0 = vfmaq_lane_f32(vo0p0, vi2x6789, vget_high_f32(vwCDEF), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_2x4() 331 const float32x4_t vi2x6789 = vextq_f32(vi2x4567, vi2x89AB, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_2x4() local 344 vo1p0 = vfmaq_lane_f32(vo1p0, vi2x6789, vget_high_f32(vw89AB), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_2x4() 346 vo0p0 = vfmaq_lane_f32(vo0p0, vi2x6789, vget_high_f32(vwCDEF), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_2x4() 464 const float32x4_t vi2x6789 = vextq_f32(vi2x5678, vzero, 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_2x4() local 473 vo1p0 = vfmaq_lane_f32(vo1p0, vi2x6789, vget_high_f32(vw89AB), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_2x4() 475 vo0p0 = vfmaq_lane_f32(vo0p0, vi2x6789, vget_high_f32(vwCDEF), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_2x4()
|
D | 5x5p2-minmax-neon-2x4.c | 187 const float32x4_t vi2x6789 = vextq_f32(vi2x4567, vi2x89AB, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_2x4() local 200 vo1p0 = vmlaq_lane_f32(vo1p0, vi2x6789, vget_high_f32(vw89AB), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_2x4() 202 vo0p0 = vmlaq_lane_f32(vo0p0, vi2x6789, vget_high_f32(vwCDEF), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_2x4() 331 const float32x4_t vi2x6789 = vextq_f32(vi2x4567, vi2x89AB, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_2x4() local 344 vo1p0 = vmlaq_lane_f32(vo1p0, vi2x6789, vget_high_f32(vw89AB), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_2x4() 346 vo0p0 = vmlaq_lane_f32(vo0p0, vi2x6789, vget_high_f32(vwCDEF), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_2x4() 464 const float32x4_t vi2x6789 = vextq_f32(vi2x5678, vzero, 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_2x4() local 473 vo1p0 = vmlaq_lane_f32(vo1p0, vi2x6789, vget_high_f32(vw89AB), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_2x4() 475 vo0p0 = vmlaq_lane_f32(vo0p0, vi2x6789, vget_high_f32(vwCDEF), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_2x4()
|
D | 5x5p2-minmax-neonfma-2x4-acc2.c | 187 const float32x4_t vi2x6789 = vextq_f32(vi2x4567, vi2x89AB, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_2x4_acc2() local 200 vo1p1 = vfmaq_lane_f32(vo1p1, vi2x6789, vget_high_f32(vw89AB), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_2x4_acc2() 202 vo0p0 = vfmaq_lane_f32(vo0p0, vi2x6789, vget_high_f32(vwCDEF), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_2x4_acc2() 333 const float32x4_t vi2x6789 = vextq_f32(vi2x4567, vi2x89AB, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_2x4_acc2() local 346 vo1p1 = vfmaq_lane_f32(vo1p1, vi2x6789, vget_high_f32(vw89AB), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_2x4_acc2() 348 vo0p0 = vfmaq_lane_f32(vo0p0, vi2x6789, vget_high_f32(vwCDEF), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_2x4_acc2() 468 const float32x4_t vi2x6789 = vextq_f32(vi2x5678, vzero, 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_2x4_acc2() local 477 vo1p1 = vfmaq_lane_f32(vo1p1, vi2x6789, vget_high_f32(vw89AB), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_2x4_acc2() 479 vo0p0 = vfmaq_lane_f32(vo0p0, vi2x6789, vget_high_f32(vwCDEF), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_2x4_acc2()
|
D | 5x5p2-minmax-neonfma-3x4.c | 221 const float32x4_t vi2x6789 = vextq_f32(vi2x4567, vi2x89AB, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_3x4() local 234 vo2p0 = vfmaq_lane_f32(vo2p0, vi2x6789, vget_low_f32(vw4567), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_3x4() 237 vo1p0 = vfmaq_lane_f32(vo1p0, vi2x6789, vget_high_f32(vw89AB), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_3x4() 240 vo0p0 = vfmaq_lane_f32(vo0p0, vi2x6789, vget_high_f32(vwCDEF), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_3x4() 402 const float32x4_t vi2x6789 = vextq_f32(vi2x4567, vi2x89AB, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_3x4() local 415 vo2p0 = vfmaq_lane_f32(vo2p0, vi2x6789, vget_low_f32(vw4567), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_3x4() 418 vo1p0 = vfmaq_lane_f32(vo1p0, vi2x6789, vget_high_f32(vw89AB), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_3x4() 421 vo0p0 = vfmaq_lane_f32(vo0p0, vi2x6789, vget_high_f32(vwCDEF), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_3x4() 570 const float32x4_t vi2x6789 = vextq_f32(vi2x5678, vzero, 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_3x4() local 578 vo2p0 = vfmaq_lane_f32(vo2p0, vi2x6789, vget_low_f32(vw4567), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_3x4() [all …]
|
D | 5x5p2-minmax-wasmsimd-arm-loadsplat-3x4.c | 250 const v128_t vi2x6789 = wasm_v32x4_shuffle(vi2x4567, vi2x89AB, 2, 3, 4, 5); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_3x4() local 263 vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi2x6789, vk04)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_3x4() 266 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi2x6789, vk14)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_3x4() 269 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi2x6789, vk24)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_3x4() 430 const v128_t vi2x6789 = wasm_v32x4_shuffle(vi2x4567, vi2x89AB, 2, 3, 4, 5); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_3x4() local 443 vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi2x6789, vk04)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_3x4() 446 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi2x6789, vk14)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_3x4() 449 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi2x6789, vk24)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_3x4() 596 const v128_t vi2x6789 = wasm_v32x4_shuffle(vi2x5678, vzero, 1, 2, 3, 4); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_3x4() local 604 vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi2x6789, vk04)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_3x4() [all …]
|
D | 5x5p2-minmax-wasmsimd-arm-loadsplat-3x4-acc2.c | 250 const v128_t vi2x6789 = wasm_v32x4_shuffle(vi2x4567, vi2x89AB, 2, 3, 4, 5); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_3x4_acc2() local 263 vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi2x6789, vk04)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_3x4_acc2() 266 vo1p1 = wasm_f32x4_add(vo1p1, wasm_f32x4_mul(vi2x6789, vk14)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_3x4_acc2() 269 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi2x6789, vk24)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_3x4_acc2() 433 const v128_t vi2x6789 = wasm_v32x4_shuffle(vi2x4567, vi2x89AB, 2, 3, 4, 5); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_3x4_acc2() local 446 vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi2x6789, vk04)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_3x4_acc2() 449 vo1p1 = wasm_f32x4_add(vo1p1, wasm_f32x4_mul(vi2x6789, vk14)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_3x4_acc2() 452 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi2x6789, vk24)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_3x4_acc2() 602 const v128_t vi2x6789 = wasm_v32x4_shuffle(vi2x5678, vzero, 1, 2, 3, 4); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_3x4_acc2() local 610 vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi2x6789, vk04)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_3x4_acc2() [all …]
|
D | 5x5p2-minmax-wasmsimd-x86-loadsplat-3x4.c | 250 const v128_t vi2x6789 = wasm_v32x4_shuffle(vi2x4567, vi2x89AB, 2, 3, 4, 5); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_3x4() local 263 vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi2x6789, vk04)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_3x4() 266 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi2x6789, vk14)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_3x4() 269 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi2x6789, vk24)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_3x4() 430 const v128_t vi2x6789 = wasm_v32x4_shuffle(vi2x4567, vi2x89AB, 2, 3, 4, 5); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_3x4() local 443 vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi2x6789, vk04)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_3x4() 446 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi2x6789, vk14)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_3x4() 449 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi2x6789, vk24)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_3x4() 596 const v128_t vi2x6789 = wasm_v32x4_shuffle(vi2x5678, vzero, 1, 2, 3, 4); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_3x4() local 604 vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi2x6789, vk04)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_3x4() [all …]
|
D | 5x5p2-minmax-neon-3x4-acc2.c | 221 const float32x4_t vi2x6789 = vextq_f32(vi2x4567, vi2x89AB, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_3x4_acc2() local 234 vo2p0 = vmlaq_lane_f32(vo2p0, vi2x6789, vget_low_f32(vw4567), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_3x4_acc2() 237 vo1p1 = vmlaq_lane_f32(vo1p1, vi2x6789, vget_high_f32(vw89AB), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_3x4_acc2() 240 vo0p0 = vmlaq_lane_f32(vo0p0, vi2x6789, vget_high_f32(vwCDEF), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_3x4_acc2() 405 const float32x4_t vi2x6789 = vextq_f32(vi2x4567, vi2x89AB, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_3x4_acc2() local 418 vo2p0 = vmlaq_lane_f32(vo2p0, vi2x6789, vget_low_f32(vw4567), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_3x4_acc2() 421 vo1p1 = vmlaq_lane_f32(vo1p1, vi2x6789, vget_high_f32(vw89AB), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_3x4_acc2() 424 vo0p0 = vmlaq_lane_f32(vo0p0, vi2x6789, vget_high_f32(vwCDEF), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_3x4_acc2() 576 const float32x4_t vi2x6789 = vextq_f32(vi2x5678, vzero, 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_3x4_acc2() local 584 vo2p0 = vmlaq_lane_f32(vo2p0, vi2x6789, vget_low_f32(vw4567), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_3x4_acc2() [all …]
|
D | 5x5p2-minmax-neon-3x4.c | 221 const float32x4_t vi2x6789 = vextq_f32(vi2x4567, vi2x89AB, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_3x4() local 234 vo2p0 = vmlaq_lane_f32(vo2p0, vi2x6789, vget_low_f32(vw4567), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_3x4() 237 vo1p0 = vmlaq_lane_f32(vo1p0, vi2x6789, vget_high_f32(vw89AB), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_3x4() 240 vo0p0 = vmlaq_lane_f32(vo0p0, vi2x6789, vget_high_f32(vwCDEF), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_3x4() 402 const float32x4_t vi2x6789 = vextq_f32(vi2x4567, vi2x89AB, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_3x4() local 415 vo2p0 = vmlaq_lane_f32(vo2p0, vi2x6789, vget_low_f32(vw4567), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_3x4() 418 vo1p0 = vmlaq_lane_f32(vo1p0, vi2x6789, vget_high_f32(vw89AB), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_3x4() 421 vo0p0 = vmlaq_lane_f32(vo0p0, vi2x6789, vget_high_f32(vwCDEF), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_3x4() 570 const float32x4_t vi2x6789 = vextq_f32(vi2x5678, vzero, 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_3x4() local 578 vo2p0 = vmlaq_lane_f32(vo2p0, vi2x6789, vget_low_f32(vw4567), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_3x4() [all …]
|
D | 5x5p2-minmax-neonfma-3x4-acc2.c | 221 const float32x4_t vi2x6789 = vextq_f32(vi2x4567, vi2x89AB, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_3x4_acc2() local 234 vo2p0 = vfmaq_lane_f32(vo2p0, vi2x6789, vget_low_f32(vw4567), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_3x4_acc2() 237 vo1p1 = vfmaq_lane_f32(vo1p1, vi2x6789, vget_high_f32(vw89AB), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_3x4_acc2() 240 vo0p0 = vfmaq_lane_f32(vo0p0, vi2x6789, vget_high_f32(vwCDEF), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_3x4_acc2() 405 const float32x4_t vi2x6789 = vextq_f32(vi2x4567, vi2x89AB, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_3x4_acc2() local 418 vo2p0 = vfmaq_lane_f32(vo2p0, vi2x6789, vget_low_f32(vw4567), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_3x4_acc2() 421 vo1p1 = vfmaq_lane_f32(vo1p1, vi2x6789, vget_high_f32(vw89AB), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_3x4_acc2() 424 vo0p0 = vfmaq_lane_f32(vo0p0, vi2x6789, vget_high_f32(vwCDEF), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_3x4_acc2() 576 const float32x4_t vi2x6789 = vextq_f32(vi2x5678, vzero, 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_3x4_acc2() local 584 vo2p0 = vfmaq_lane_f32(vo2p0, vi2x6789, vget_low_f32(vw4567), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_3x4_acc2() [all …]
|
D | 5x5p2-minmax-neonfma-2x4-acc3.c | 187 const float32x4_t vi2x6789 = vextq_f32(vi2x4567, vi2x89AB, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_2x4_acc3() local 200 vo1p2 = vfmaq_lane_f32(vo1p2, vi2x6789, vget_high_f32(vw89AB), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_2x4_acc3() 202 vo0p0 = vfmaq_lane_f32(vo0p0, vi2x6789, vget_high_f32(vwCDEF), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_2x4_acc3() 335 const float32x4_t vi2x6789 = vextq_f32(vi2x4567, vi2x89AB, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_2x4_acc3() local 348 vo1p2 = vfmaq_lane_f32(vo1p2, vi2x6789, vget_high_f32(vw89AB), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_2x4_acc3() 350 vo0p0 = vfmaq_lane_f32(vo0p0, vi2x6789, vget_high_f32(vwCDEF), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_2x4_acc3() 472 const float32x4_t vi2x6789 = vextq_f32(vi2x5678, vzero, 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_2x4_acc3() local 481 vo1p2 = vfmaq_lane_f32(vo1p2, vi2x6789, vget_high_f32(vw89AB), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_2x4_acc3() 483 vo0p0 = vfmaq_lane_f32(vo0p0, vi2x6789, vget_high_f32(vwCDEF), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_2x4_acc3()
|
D | 5x5p2-minmax-wasmsimd-x86-loadsplat-2x4.c | 216 const v128_t vi2x6789 = wasm_v32x4_shuffle(vi2x4567, vi2x89AB, 2, 3, 4, 5); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_2x4() local 229 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi2x6789, vk14)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_2x4() 231 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi2x6789, vk24)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_2x4() 359 const v128_t vi2x6789 = wasm_v32x4_shuffle(vi2x4567, vi2x89AB, 2, 3, 4, 5); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_2x4() local 372 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi2x6789, vk14)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_2x4() 374 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi2x6789, vk24)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_2x4() 490 const v128_t vi2x6789 = wasm_v32x4_shuffle(vi2x5678, vzero, 1, 2, 3, 4); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_2x4() local 499 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi2x6789, vk14)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_2x4() 501 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi2x6789, vk24)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_2x4()
|
D | 5x5p2-minmax-wasmsimd-arm-loadsplat-2x4-acc3.c | 216 const v128_t vi2x6789 = wasm_v32x4_shuffle(vi2x4567, vi2x89AB, 2, 3, 4, 5); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_2x4_acc3() local 229 vo1p2 = wasm_f32x4_add(vo1p2, wasm_f32x4_mul(vi2x6789, vk14)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_2x4_acc3() 231 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi2x6789, vk24)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_2x4_acc3() 363 const v128_t vi2x6789 = wasm_v32x4_shuffle(vi2x4567, vi2x89AB, 2, 3, 4, 5); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_2x4_acc3() local 376 vo1p2 = wasm_f32x4_add(vo1p2, wasm_f32x4_mul(vi2x6789, vk14)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_2x4_acc3() 378 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi2x6789, vk24)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_2x4_acc3() 498 const v128_t vi2x6789 = wasm_v32x4_shuffle(vi2x5678, vzero, 1, 2, 3, 4); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_2x4_acc3() local 507 vo1p2 = wasm_f32x4_add(vo1p2, wasm_f32x4_mul(vi2x6789, vk14)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_2x4_acc3() 509 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi2x6789, vk24)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_2x4_acc3()
|
D | 5x5p2-minmax-wasmsimd-x86-loadsplat-2x4-acc2.c | 216 const v128_t vi2x6789 = wasm_v32x4_shuffle(vi2x4567, vi2x89AB, 2, 3, 4, 5); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_2x4_acc2() local 229 vo1p1 = wasm_f32x4_add(vo1p1, wasm_f32x4_mul(vi2x6789, vk14)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_2x4_acc2() 231 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi2x6789, vk24)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_2x4_acc2() 361 const v128_t vi2x6789 = wasm_v32x4_shuffle(vi2x4567, vi2x89AB, 2, 3, 4, 5); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_2x4_acc2() local 374 vo1p1 = wasm_f32x4_add(vo1p1, wasm_f32x4_mul(vi2x6789, vk14)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_2x4_acc2() 376 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi2x6789, vk24)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_2x4_acc2() 494 const v128_t vi2x6789 = wasm_v32x4_shuffle(vi2x5678, vzero, 1, 2, 3, 4); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_2x4_acc2() local 503 vo1p1 = wasm_f32x4_add(vo1p1, wasm_f32x4_mul(vi2x6789, vk14)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_2x4_acc2() 505 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi2x6789, vk24)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_2x4_acc2()
|
D | 5x5p2-minmax-neon-2x4-acc3.c | 187 const float32x4_t vi2x6789 = vextq_f32(vi2x4567, vi2x89AB, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_2x4_acc3() local 200 vo1p2 = vmlaq_lane_f32(vo1p2, vi2x6789, vget_high_f32(vw89AB), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_2x4_acc3() 202 vo0p0 = vmlaq_lane_f32(vo0p0, vi2x6789, vget_high_f32(vwCDEF), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_2x4_acc3() 335 const float32x4_t vi2x6789 = vextq_f32(vi2x4567, vi2x89AB, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_2x4_acc3() local 348 vo1p2 = vmlaq_lane_f32(vo1p2, vi2x6789, vget_high_f32(vw89AB), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_2x4_acc3() 350 vo0p0 = vmlaq_lane_f32(vo0p0, vi2x6789, vget_high_f32(vwCDEF), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_2x4_acc3() 472 const float32x4_t vi2x6789 = vextq_f32(vi2x5678, vzero, 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_2x4_acc3() local 481 vo1p2 = vmlaq_lane_f32(vo1p2, vi2x6789, vget_high_f32(vw89AB), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_2x4_acc3() 483 vo0p0 = vmlaq_lane_f32(vo0p0, vi2x6789, vget_high_f32(vwCDEF), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_2x4_acc3()
|
D | 5x5p2-minmax-wasmsimd-arm-loadsplat-2x4.c | 216 const v128_t vi2x6789 = wasm_v32x4_shuffle(vi2x4567, vi2x89AB, 2, 3, 4, 5); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_2x4() local 229 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi2x6789, vk14)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_2x4() 231 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi2x6789, vk24)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_2x4() 359 const v128_t vi2x6789 = wasm_v32x4_shuffle(vi2x4567, vi2x89AB, 2, 3, 4, 5); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_2x4() local 372 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi2x6789, vk14)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_2x4() 374 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi2x6789, vk24)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_2x4() 490 const v128_t vi2x6789 = wasm_v32x4_shuffle(vi2x5678, vzero, 1, 2, 3, 4); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_2x4() local 499 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi2x6789, vk14)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_2x4() 501 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi2x6789, vk24)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_2x4()
|
D | 5x5p2-minmax-wasmsimd-arm-loadsplat-2x4-acc2.c | 216 const v128_t vi2x6789 = wasm_v32x4_shuffle(vi2x4567, vi2x89AB, 2, 3, 4, 5); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_2x4_acc2() local 229 vo1p1 = wasm_f32x4_add(vo1p1, wasm_f32x4_mul(vi2x6789, vk14)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_2x4_acc2() 231 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi2x6789, vk24)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_2x4_acc2() 361 const v128_t vi2x6789 = wasm_v32x4_shuffle(vi2x4567, vi2x89AB, 2, 3, 4, 5); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_2x4_acc2() local 374 vo1p1 = wasm_f32x4_add(vo1p1, wasm_f32x4_mul(vi2x6789, vk14)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_2x4_acc2() 376 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi2x6789, vk24)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_2x4_acc2() 494 const v128_t vi2x6789 = wasm_v32x4_shuffle(vi2x5678, vzero, 1, 2, 3, 4); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_2x4_acc2() local 503 vo1p1 = wasm_f32x4_add(vo1p1, wasm_f32x4_mul(vi2x6789, vk14)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_2x4_acc2() 505 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi2x6789, vk24)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_2x4_acc2()
|
D | 5x5p2-minmax-wasmsimd-arm-loadsplat-1x4.c | 182 const v128_t vi2x6789 = wasm_v32x4_shuffle(vi2x4567, vi2x89AB, 2, 3, 4, 5); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_1x4() local 193 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi2x6789, vk24)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_1x4() 288 const v128_t vi2x6789 = wasm_v32x4_shuffle(vi2x4567, vi2x89AB, 2, 3, 4, 5); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_1x4() local 299 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi2x6789, vk24)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_1x4() 384 const v128_t vi2x6789 = wasm_v32x4_shuffle(vi2x5678, vzero, 1, 2, 3, 4); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_1x4() local 392 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi2x6789, vk24)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_1x4()
|