/external/XNNPACK/src/f32-dwconv2d-chw/gen/ |
D | 3x3s2p1-minmax-neonfma-2x4-acc2.c | 102 const float32x4_t vi2x7BDF = vextq_f32(vi2x1357, vi2x8ACE9BDF.val[1], 3); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_2x4_acc2() local 110 vo1p1 = vfmaq_lane_f32(vo1p1, vi2x7BDF, vget_low_f32(vw0123), 1); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_2x4_acc2() 115 vo0p1 = vfmaq_lane_f32(vo0p1, vi2x7BDF, vget_high_f32(vw4567), 1); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_2x4_acc2() 173 const float32x4_t vi2x7BDF = vextq_f32(vi2x1357, vi2x9BDF, 3); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_2x4_acc2() local 178 vo1p1 = vfmaq_lane_f32(vo1p1, vi2x7BDF, vget_low_f32(vw0123), 1); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_2x4_acc2() 183 vo0p1 = vfmaq_lane_f32(vo0p1, vi2x7BDF, vget_high_f32(vw4567), 1); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_2x4_acc2()
|
D | 3x3s2p1-minmax-neon-2x4.c | 102 const float32x4_t vi2x7BDF = vextq_f32(vi2x1357, vi2x8ACE9BDF.val[1], 3); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_2x4() local 110 vo1p0 = vmlaq_lane_f32(vo1p0, vi2x7BDF, vget_low_f32(vw0123), 1); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_2x4() 115 vo0p0 = vmlaq_lane_f32(vo0p0, vi2x7BDF, vget_high_f32(vw4567), 1); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_2x4() 171 const float32x4_t vi2x7BDF = vextq_f32(vi2x1357, vi2x9BDF, 3); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_2x4() local 176 vo1p0 = vmlaq_lane_f32(vo1p0, vi2x7BDF, vget_low_f32(vw0123), 1); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_2x4() 181 vo0p0 = vmlaq_lane_f32(vo0p0, vi2x7BDF, vget_high_f32(vw4567), 1); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_2x4()
|
D | 3x3s2p1-minmax-neonfma-2x4.c | 102 const float32x4_t vi2x7BDF = vextq_f32(vi2x1357, vi2x8ACE9BDF.val[1], 3); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_2x4() local 110 vo1p0 = vfmaq_lane_f32(vo1p0, vi2x7BDF, vget_low_f32(vw0123), 1); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_2x4() 115 vo0p0 = vfmaq_lane_f32(vo0p0, vi2x7BDF, vget_high_f32(vw4567), 1); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_2x4() 171 const float32x4_t vi2x7BDF = vextq_f32(vi2x1357, vi2x9BDF, 3); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_2x4() local 176 vo1p0 = vfmaq_lane_f32(vo1p0, vi2x7BDF, vget_low_f32(vw0123), 1); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_2x4() 181 vo0p0 = vfmaq_lane_f32(vo0p0, vi2x7BDF, vget_high_f32(vw4567), 1); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_2x4()
|
D | 3x3s2p1-minmax-neonfma-1x4.c | 83 const float32x4_t vi2x7BDF = vextq_f32(vi2x1357, vi2x8ACE9BDF.val[1], 3); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_1x4() local 90 vo0p0 = vfmaq_lane_f32(vo0p0, vi2x7BDF, vget_high_f32(vw4567), 1); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_1x4() 129 const float32x4_t vi2x7BDF = vextq_f32(vi2x1357, vi2x9BDF, 3); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_1x4() local 135 vo0p0 = vfmaq_lane_f32(vo0p0, vi2x7BDF, vget_high_f32(vw4567), 1); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_1x4()
|
D | 3x3s2p1-minmax-neonfma-1x4-acc2.c | 83 const float32x4_t vi2x7BDF = vextq_f32(vi2x1357, vi2x8ACE9BDF.val[1], 3); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_1x4_acc2() local 90 vo0p1 = vfmaq_lane_f32(vo0p1, vi2x7BDF, vget_high_f32(vw4567), 1); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_1x4_acc2() 130 const float32x4_t vi2x7BDF = vextq_f32(vi2x1357, vi2x9BDF, 3); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_1x4_acc2() local 136 vo0p1 = vfmaq_lane_f32(vo0p1, vi2x7BDF, vget_high_f32(vw4567), 1); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_1x4_acc2()
|
D | 3x3s2p1-minmax-neonfma-1x4-acc4.c | 83 const float32x4_t vi2x7BDF = vextq_f32(vi2x1357, vi2x8ACE9BDF.val[1], 3); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_1x4_acc4() local 90 vo0p3 = vfmaq_lane_f32(vo0p3, vi2x7BDF, vget_high_f32(vw4567), 1); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_1x4_acc4() 132 const float32x4_t vi2x7BDF = vextq_f32(vi2x1357, vi2x9BDF, 3); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_1x4_acc4() local 138 vo0p3 = vfmaq_lane_f32(vo0p3, vi2x7BDF, vget_high_f32(vw4567), 1); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_1x4_acc4()
|
D | 3x3s2p1-minmax-neon-1x4-acc2.c | 83 const float32x4_t vi2x7BDF = vextq_f32(vi2x1357, vi2x8ACE9BDF.val[1], 3); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_1x4_acc2() local 90 vo0p1 = vmlaq_lane_f32(vo0p1, vi2x7BDF, vget_high_f32(vw4567), 1); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_1x4_acc2() 130 const float32x4_t vi2x7BDF = vextq_f32(vi2x1357, vi2x9BDF, 3); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_1x4_acc2() local 136 vo0p1 = vmlaq_lane_f32(vo0p1, vi2x7BDF, vget_high_f32(vw4567), 1); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_1x4_acc2()
|
D | 3x3s2p1-minmax-neon-1x4.c | 83 const float32x4_t vi2x7BDF = vextq_f32(vi2x1357, vi2x8ACE9BDF.val[1], 3); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_1x4() local 90 vo0p0 = vmlaq_lane_f32(vo0p0, vi2x7BDF, vget_high_f32(vw4567), 1); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_1x4() 129 const float32x4_t vi2x7BDF = vextq_f32(vi2x1357, vi2x9BDF, 3); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_1x4() local 135 vo0p0 = vmlaq_lane_f32(vo0p0, vi2x7BDF, vget_high_f32(vw4567), 1); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_1x4()
|
D | 3x3s2p1-minmax-neon-1x4-acc3.c | 83 const float32x4_t vi2x7BDF = vextq_f32(vi2x1357, vi2x8ACE9BDF.val[1], 3); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_1x4_acc3() local 90 vo0p1 = vmlaq_lane_f32(vo0p1, vi2x7BDF, vget_high_f32(vw4567), 1); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_1x4_acc3() 131 const float32x4_t vi2x7BDF = vextq_f32(vi2x1357, vi2x9BDF, 3); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_1x4_acc3() local 137 vo0p1 = vmlaq_lane_f32(vo0p1, vi2x7BDF, vget_high_f32(vw4567), 1); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_1x4_acc3()
|
D | 3x3s2p1-minmax-neonfma-1x4-acc3.c | 83 const float32x4_t vi2x7BDF = vextq_f32(vi2x1357, vi2x8ACE9BDF.val[1], 3); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_1x4_acc3() local 90 vo0p1 = vfmaq_lane_f32(vo0p1, vi2x7BDF, vget_high_f32(vw4567), 1); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_1x4_acc3() 131 const float32x4_t vi2x7BDF = vextq_f32(vi2x1357, vi2x9BDF, 3); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_1x4_acc3() local 137 vo0p1 = vfmaq_lane_f32(vo0p1, vi2x7BDF, vget_high_f32(vw4567), 1); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_1x4_acc3()
|
D | 3x3s2p1-minmax-neon-1x4-acc4.c | 83 const float32x4_t vi2x7BDF = vextq_f32(vi2x1357, vi2x8ACE9BDF.val[1], 3); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_1x4_acc4() local 90 vo0p3 = vmlaq_lane_f32(vo0p3, vi2x7BDF, vget_high_f32(vw4567), 1); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_1x4_acc4() 132 const float32x4_t vi2x7BDF = vextq_f32(vi2x1357, vi2x9BDF, 3); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_1x4_acc4() local 138 vo0p3 = vmlaq_lane_f32(vo0p3, vi2x7BDF, vget_high_f32(vw4567), 1); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_1x4_acc4()
|
D | 3x3s2p1-minmax-wasmsimd-x86-loadsplat-2x4.c | 136 const v128_t vi2x7BDF = wasm_v32x4_shuffle(vi2x1357, vi2x9BDF, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_2x4() local 144 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi2x7BDF, vk00)); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_2x4() 149 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi2x7BDF, vk20)); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_2x4() 209 const v128_t vi2x7BDF = wasm_v32x4_shuffle(vi2x1357, vi2x9BDF, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_2x4() local 214 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi2x7BDF, vk00)); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_2x4() 219 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi2x7BDF, vk20)); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_2x4()
|
D | 3x3s2p1-minmax-wasmsimd-x86-loadsplat-2x4-acc2.c | 136 const v128_t vi2x7BDF = wasm_v32x4_shuffle(vi2x1357, vi2x9BDF, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_2x4_acc2() local 144 vo1p1 = wasm_f32x4_add(vo1p1, wasm_f32x4_mul(vi2x7BDF, vk00)); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_2x4_acc2() 149 vo0p1 = wasm_f32x4_add(vo0p1, wasm_f32x4_mul(vi2x7BDF, vk20)); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_2x4_acc2() 211 const v128_t vi2x7BDF = wasm_v32x4_shuffle(vi2x1357, vi2x9BDF, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_2x4_acc2() local 216 vo1p1 = wasm_f32x4_add(vo1p1, wasm_f32x4_mul(vi2x7BDF, vk00)); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_2x4_acc2() 221 vo0p1 = wasm_f32x4_add(vo0p1, wasm_f32x4_mul(vi2x7BDF, vk20)); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_2x4_acc2()
|
D | 3x3s2p1-minmax-wasmsimd-arm-loadsplat-2x4-acc2.c | 136 const v128_t vi2x7BDF = wasm_v32x4_shuffle(vi2x1357, vi2x9BDF, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_2x4_acc2() local 144 vo1p1 = wasm_f32x4_add(vo1p1, wasm_f32x4_mul(vi2x7BDF, vk00)); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_2x4_acc2() 149 vo0p1 = wasm_f32x4_add(vo0p1, wasm_f32x4_mul(vi2x7BDF, vk20)); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_2x4_acc2() 211 const v128_t vi2x7BDF = wasm_v32x4_shuffle(vi2x1357, vi2x9BDF, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_2x4_acc2() local 216 vo1p1 = wasm_f32x4_add(vo1p1, wasm_f32x4_mul(vi2x7BDF, vk00)); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_2x4_acc2() 221 vo0p1 = wasm_f32x4_add(vo0p1, wasm_f32x4_mul(vi2x7BDF, vk20)); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_2x4_acc2()
|
D | 3x3s2p1-minmax-wasmsimd-arm-splat-2x4-acc2.c | 126 const v128_t vi2x7BDF = wasm_v32x4_shuffle(vi2x1357, vi2x9BDF, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_2x4_acc2() local 134 …vo1p1 = wasm_f32x4_add(vo1p1, wasm_f32x4_mul(vi2x7BDF, wasm_v32x4_shuffle(vw0123, vw0123, 1, 1, 1,… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_2x4_acc2() 139 …vo0p1 = wasm_f32x4_add(vo0p1, wasm_f32x4_mul(vi2x7BDF, wasm_v32x4_shuffle(vw4567, vw4567, 3, 3, 3,… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_2x4_acc2() 201 const v128_t vi2x7BDF = wasm_v32x4_shuffle(vi2x1357, vi2x9BDF, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_2x4_acc2() local 206 …vo1p1 = wasm_f32x4_add(vo1p1, wasm_f32x4_mul(vi2x7BDF, wasm_v32x4_shuffle(vw0123, vw0123, 1, 1, 1,… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_2x4_acc2() 211 …vo0p1 = wasm_f32x4_add(vo0p1, wasm_f32x4_mul(vi2x7BDF, wasm_v32x4_shuffle(vw4567, vw4567, 3, 3, 3,… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_2x4_acc2()
|
D | 3x3s2p1-minmax-wasmsimd-arm-splat-2x4.c | 126 const v128_t vi2x7BDF = wasm_v32x4_shuffle(vi2x1357, vi2x9BDF, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_2x4() local 134 …vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi2x7BDF, wasm_v32x4_shuffle(vw0123, vw0123, 1, 1, 1,… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_2x4() 139 …vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi2x7BDF, wasm_v32x4_shuffle(vw4567, vw4567, 3, 3, 3,… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_2x4() 199 const v128_t vi2x7BDF = wasm_v32x4_shuffle(vi2x1357, vi2x9BDF, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_2x4() local 204 …vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi2x7BDF, wasm_v32x4_shuffle(vw0123, vw0123, 1, 1, 1,… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_2x4() 209 …vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi2x7BDF, wasm_v32x4_shuffle(vw4567, vw4567, 3, 3, 3,… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_2x4()
|
D | 3x3s2p1-minmax-wasmsimd-arm-loadsplat-2x4.c | 136 const v128_t vi2x7BDF = wasm_v32x4_shuffle(vi2x1357, vi2x9BDF, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_2x4() local 144 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi2x7BDF, vk00)); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_2x4() 149 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi2x7BDF, vk20)); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_2x4() 209 const v128_t vi2x7BDF = wasm_v32x4_shuffle(vi2x1357, vi2x9BDF, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_2x4() local 214 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi2x7BDF, vk00)); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_2x4() 219 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi2x7BDF, vk20)); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_2x4()
|
D | 3x3s2p1-minmax-wasmsimd-arm-loadsplat-1x4.c | 109 const v128_t vi2x7BDF = wasm_v32x4_shuffle(vi2x1357, vi2x9BDF, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_1x4() local 116 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi2x7BDF, vk20)); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_1x4() 157 const v128_t vi2x7BDF = wasm_v32x4_shuffle(vi2x1357, vi2x9BDF, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_1x4() local 163 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi2x7BDF, vk20)); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_1x4()
|
D | 3x3s2p1-minmax-wasmsimd-arm-loadsplat-1x4-acc2.c | 109 const v128_t vi2x7BDF = wasm_v32x4_shuffle(vi2x1357, vi2x9BDF, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_1x4_acc2() local 116 vo0p1 = wasm_f32x4_add(vo0p1, wasm_f32x4_mul(vi2x7BDF, vk20)); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_1x4_acc2() 158 const v128_t vi2x7BDF = wasm_v32x4_shuffle(vi2x1357, vi2x9BDF, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_1x4_acc2() local 164 vo0p1 = wasm_f32x4_add(vo0p1, wasm_f32x4_mul(vi2x7BDF, vk20)); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_1x4_acc2()
|
D | 3x3s2p1-minmax-wasmsimd-x86-splat-1x4.c | 99 const v128_t vi2x7BDF = wasm_v32x4_shuffle(vi2x1357, vi2x9BDF, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_1x4() local 106 …vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi2x7BDF, wasm_v32x4_shuffle(vw4567, vw4567, 3, 3, 3,… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_1x4() 147 const v128_t vi2x7BDF = wasm_v32x4_shuffle(vi2x1357, vi2x9BDF, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_1x4() local 153 …vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi2x7BDF, wasm_v32x4_shuffle(vw4567, vw4567, 3, 3, 3,… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_1x4()
|
D | 3x3s2p1-minmax-wasmsimd-arm-splat-1x4.c | 99 const v128_t vi2x7BDF = wasm_v32x4_shuffle(vi2x1357, vi2x9BDF, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_1x4() local 106 …vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi2x7BDF, wasm_v32x4_shuffle(vw4567, vw4567, 3, 3, 3,… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_1x4() 147 const v128_t vi2x7BDF = wasm_v32x4_shuffle(vi2x1357, vi2x9BDF, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_1x4() local 153 …vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi2x7BDF, wasm_v32x4_shuffle(vw4567, vw4567, 3, 3, 3,… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_1x4()
|
D | 3x3s2p1-minmax-wasmsimd-arm-splat-1x4-acc3.c | 99 const v128_t vi2x7BDF = wasm_v32x4_shuffle(vi2x1357, vi2x9BDF, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_1x4_acc3() local 106 …vo0p1 = wasm_f32x4_add(vo0p1, wasm_f32x4_mul(vi2x7BDF, wasm_v32x4_shuffle(vw4567, vw4567, 3, 3, 3,… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_1x4_acc3() 149 const v128_t vi2x7BDF = wasm_v32x4_shuffle(vi2x1357, vi2x9BDF, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_1x4_acc3() local 155 …vo0p1 = wasm_f32x4_add(vo0p1, wasm_f32x4_mul(vi2x7BDF, wasm_v32x4_shuffle(vw4567, vw4567, 3, 3, 3,… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_1x4_acc3()
|
D | 3x3s2p1-minmax-wasmsimd-arm-splat-1x4-acc2.c | 99 const v128_t vi2x7BDF = wasm_v32x4_shuffle(vi2x1357, vi2x9BDF, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_1x4_acc2() local 106 …vo0p1 = wasm_f32x4_add(vo0p1, wasm_f32x4_mul(vi2x7BDF, wasm_v32x4_shuffle(vw4567, vw4567, 3, 3, 3,… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_1x4_acc2() 148 const v128_t vi2x7BDF = wasm_v32x4_shuffle(vi2x1357, vi2x9BDF, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_1x4_acc2() local 154 …vo0p1 = wasm_f32x4_add(vo0p1, wasm_f32x4_mul(vi2x7BDF, wasm_v32x4_shuffle(vw4567, vw4567, 3, 3, 3,… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_1x4_acc2()
|
D | 3x3s2p1-minmax-wasmsimd-x86-splat-1x4-acc2.c | 99 const v128_t vi2x7BDF = wasm_v32x4_shuffle(vi2x1357, vi2x9BDF, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_1x4_acc2() local 106 …vo0p1 = wasm_f32x4_add(vo0p1, wasm_f32x4_mul(vi2x7BDF, wasm_v32x4_shuffle(vw4567, vw4567, 3, 3, 3,… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_1x4_acc2() 148 const v128_t vi2x7BDF = wasm_v32x4_shuffle(vi2x1357, vi2x9BDF, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_1x4_acc2() local 154 …vo0p1 = wasm_f32x4_add(vo0p1, wasm_f32x4_mul(vi2x7BDF, wasm_v32x4_shuffle(vw4567, vw4567, 3, 3, 3,… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_1x4_acc2()
|
D | 3x3s2p1-minmax-wasmsimd-arm-splat-1x4-acc4.c | 99 const v128_t vi2x7BDF = wasm_v32x4_shuffle(vi2x1357, vi2x9BDF, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_1x4_acc4() local 106 …vo0p3 = wasm_f32x4_add(vo0p3, wasm_f32x4_mul(vi2x7BDF, wasm_v32x4_shuffle(vw4567, vw4567, 3, 3, 3,… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_1x4_acc4() 150 const v128_t vi2x7BDF = wasm_v32x4_shuffle(vi2x1357, vi2x9BDF, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_1x4_acc4() local 156 …vo0p3 = wasm_f32x4_add(vo0p3, wasm_f32x4_mul(vi2x7BDF, wasm_v32x4_shuffle(vw4567, vw4567, 3, 3, 3,… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_1x4_acc4()
|