/external/XNNPACK/src/f32-dwconv2d-chw/gen/ |
D | 3x3s2p1-minmax-neonfma-2x4-acc2.c | 104 const float32x4_t vi3x7BDF = vextq_f32(vi3x1357, vi3x8ACE9BDF.val[1], 3); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_2x4_acc2() local 113 vo1p0 = vfmaq_lane_f32(vo1p0, vi3x7BDF, vget_low_f32(vw4567), 0); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_2x4_acc2() 174 const float32x4_t vi3x7BDF = vextq_f32(vi3x1357, vi3x9BDF, 3); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_2x4_acc2() local 181 vo1p0 = vfmaq_lane_f32(vo1p0, vi3x7BDF, vget_low_f32(vw4567), 0); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_2x4_acc2()
|
D | 3x3s2p1-minmax-neon-2x4.c | 104 const float32x4_t vi3x7BDF = vextq_f32(vi3x1357, vi3x8ACE9BDF.val[1], 3); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_2x4() local 113 vo1p0 = vmlaq_lane_f32(vo1p0, vi3x7BDF, vget_low_f32(vw4567), 0); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_2x4() 172 const float32x4_t vi3x7BDF = vextq_f32(vi3x1357, vi3x9BDF, 3); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_2x4() local 179 vo1p0 = vmlaq_lane_f32(vo1p0, vi3x7BDF, vget_low_f32(vw4567), 0); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_2x4()
|
D | 3x3s2p1-minmax-neonfma-2x4.c | 104 const float32x4_t vi3x7BDF = vextq_f32(vi3x1357, vi3x8ACE9BDF.val[1], 3); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_2x4() local 113 vo1p0 = vfmaq_lane_f32(vo1p0, vi3x7BDF, vget_low_f32(vw4567), 0); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_2x4() 172 const float32x4_t vi3x7BDF = vextq_f32(vi3x1357, vi3x9BDF, 3); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_2x4() local 179 vo1p0 = vfmaq_lane_f32(vo1p0, vi3x7BDF, vget_low_f32(vw4567), 0); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_2x4()
|
D | 3x3s2p1-minmax-wasmsimd-x86-loadsplat-2x4.c | 138 const v128_t vi3x7BDF = wasm_v32x4_shuffle(vi3x1357, vi3x9BDF, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_2x4() local 147 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi3x7BDF, vk10)); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_2x4() 210 const v128_t vi3x7BDF = wasm_v32x4_shuffle(vi3x1357, vi3x9BDF, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_2x4() local 217 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi3x7BDF, vk10)); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_2x4()
|
D | 3x3s2p1-minmax-wasmsimd-x86-loadsplat-2x4-acc2.c | 138 const v128_t vi3x7BDF = wasm_v32x4_shuffle(vi3x1357, vi3x9BDF, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_2x4_acc2() local 147 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi3x7BDF, vk10)); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_2x4_acc2() 212 const v128_t vi3x7BDF = wasm_v32x4_shuffle(vi3x1357, vi3x9BDF, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_2x4_acc2() local 219 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi3x7BDF, vk10)); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_2x4_acc2()
|
D | 3x3s2p1-minmax-wasmsimd-arm-loadsplat-2x4-acc2.c | 138 const v128_t vi3x7BDF = wasm_v32x4_shuffle(vi3x1357, vi3x9BDF, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_2x4_acc2() local 147 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi3x7BDF, vk10)); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_2x4_acc2() 212 const v128_t vi3x7BDF = wasm_v32x4_shuffle(vi3x1357, vi3x9BDF, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_2x4_acc2() local 219 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi3x7BDF, vk10)); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_2x4_acc2()
|
D | 3x3s2p1-minmax-wasmsimd-arm-splat-2x4-acc2.c | 128 const v128_t vi3x7BDF = wasm_v32x4_shuffle(vi3x1357, vi3x9BDF, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_2x4_acc2() local 137 …vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi3x7BDF, wasm_v32x4_shuffle(vw4567, vw4567, 0, 0, 0,… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_2x4_acc2() 202 const v128_t vi3x7BDF = wasm_v32x4_shuffle(vi3x1357, vi3x9BDF, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_2x4_acc2() local 209 …vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi3x7BDF, wasm_v32x4_shuffle(vw4567, vw4567, 0, 0, 0,… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_2x4_acc2()
|
D | 3x3s2p1-minmax-wasmsimd-arm-splat-2x4.c | 128 const v128_t vi3x7BDF = wasm_v32x4_shuffle(vi3x1357, vi3x9BDF, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_2x4() local 137 …vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi3x7BDF, wasm_v32x4_shuffle(vw4567, vw4567, 0, 0, 0,… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_2x4() 200 const v128_t vi3x7BDF = wasm_v32x4_shuffle(vi3x1357, vi3x9BDF, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_2x4() local 207 …vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi3x7BDF, wasm_v32x4_shuffle(vw4567, vw4567, 0, 0, 0,… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_2x4()
|
D | 3x3s2p1-minmax-wasmsimd-arm-loadsplat-2x4.c | 138 const v128_t vi3x7BDF = wasm_v32x4_shuffle(vi3x1357, vi3x9BDF, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_2x4() local 147 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi3x7BDF, vk10)); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_2x4() 210 const v128_t vi3x7BDF = wasm_v32x4_shuffle(vi3x1357, vi3x9BDF, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_2x4() local 217 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi3x7BDF, vk10)); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_2x4()
|
D | 3x3s2p1-minmax-sse-2x4-acc2.c | 137 const __m128 vi3x7BDF = _mm_move_ss(vi3xF9BD, vi3x7531); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_2x4_acc2() local 149 vo1p1 = _mm_add_ps(vo1p1, _mm_mul_ps(vi3x7BDF, vk10)); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_2x4_acc2() 215 const __m128 vi3x7BDF = _mm_move_ss(vi3xF9BD, vi3x7531); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_2x4_acc2() local 227 vo1p1 = _mm_add_ps(vo1p1, _mm_mul_ps(vi3x7BDF, vk10)); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_2x4_acc2()
|
D | 3x3s2p1-minmax-sse-2x4.c | 137 const __m128 vi3x7BDF = _mm_move_ss(vi3xF9BD, vi3x7531); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_2x4() local 149 vo1p0 = _mm_add_ps(vo1p0, _mm_mul_ps(vi3x7BDF, vk10)); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_2x4() 213 const __m128 vi3x7BDF = _mm_move_ss(vi3xF9BD, vi3x7531); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_2x4() local 225 vo1p0 = _mm_add_ps(vo1p0, _mm_mul_ps(vi3x7BDF, vk10)); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_2x4()
|
D | 3x3s2p1-minmax-wasmsimd-x86-splat-2x4.c | 128 const v128_t vi3x7BDF = wasm_v32x4_shuffle(vi3x1357, vi3x9BDF, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_2x4() local 137 …vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi3x7BDF, wasm_v32x4_shuffle(vw4567, vw4567, 0, 0, 0,… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_2x4() 200 const v128_t vi3x7BDF = wasm_v32x4_shuffle(vi3x1357, vi3x9BDF, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_2x4() local 207 …vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi3x7BDF, wasm_v32x4_shuffle(vw4567, vw4567, 0, 0, 0,… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_2x4()
|
D | 3x3s2p1-minmax-wasmsimd-x86-splat-2x4-acc2.c | 128 const v128_t vi3x7BDF = wasm_v32x4_shuffle(vi3x1357, vi3x9BDF, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_2x4_acc2() local 137 …vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi3x7BDF, wasm_v32x4_shuffle(vw4567, vw4567, 0, 0, 0,… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_2x4_acc2() 202 const v128_t vi3x7BDF = wasm_v32x4_shuffle(vi3x1357, vi3x9BDF, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_2x4_acc2() local 209 …vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi3x7BDF, wasm_v32x4_shuffle(vw4567, vw4567, 0, 0, 0,… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_2x4_acc2()
|
D | 3x3s2p1-minmax-neonfma-3x4.c | 122 const float32x4_t vi3x7BDF = vextq_f32(vi3x1357, vi3x8ACE9BDF.val[1], 3); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_3x4() local 136 vo1p0 = vfmaq_lane_f32(vo1p0, vi3x7BDF, vget_low_f32(vw4567), 0); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_3x4() 213 const float32x4_t vi3x7BDF = vextq_f32(vi3x1357, vi3x9BDF, 3); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_3x4() local 223 vo1p0 = vfmaq_lane_f32(vo1p0, vi3x7BDF, vget_low_f32(vw4567), 0); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_3x4()
|
D | 3x3s2p1-minmax-neon-3x4.c | 122 const float32x4_t vi3x7BDF = vextq_f32(vi3x1357, vi3x8ACE9BDF.val[1], 3); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_3x4() local 136 vo1p0 = vmlaq_lane_f32(vo1p0, vi3x7BDF, vget_low_f32(vw4567), 0); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_3x4() 213 const float32x4_t vi3x7BDF = vextq_f32(vi3x1357, vi3x9BDF, 3); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_3x4() local 223 vo1p0 = vmlaq_lane_f32(vo1p0, vi3x7BDF, vget_low_f32(vw4567), 0); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_3x4()
|
D | 3x3s2p1-minmax-wasmsimd-x86-loadsplat-3x4.c | 164 const v128_t vi3x7BDF = wasm_v32x4_shuffle(vi3x1357, vi3x9BDF, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_3x4() local 178 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi3x7BDF, vk10)); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_3x4() 261 const v128_t vi3x7BDF = wasm_v32x4_shuffle(vi3x1357, vi3x9BDF, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_3x4() local 271 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi3x7BDF, vk10)); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_3x4()
|
D | 3x3s2p1-minmax-wasmsimd-arm-loadsplat-3x4.c | 164 const v128_t vi3x7BDF = wasm_v32x4_shuffle(vi3x1357, vi3x9BDF, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_3x4() local 178 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi3x7BDF, vk10)); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_3x4() 261 const v128_t vi3x7BDF = wasm_v32x4_shuffle(vi3x1357, vi3x9BDF, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_3x4() local 271 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi3x7BDF, vk10)); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_3x4()
|
D | 3x3s2p1-minmax-wasmsimd-arm-splat-3x4.c | 154 const v128_t vi3x7BDF = wasm_v32x4_shuffle(vi3x1357, vi3x9BDF, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_3x4() local 168 …vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi3x7BDF, wasm_v32x4_shuffle(vw4567, vw4567, 0, 0, 0,… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_3x4() 251 const v128_t vi3x7BDF = wasm_v32x4_shuffle(vi3x1357, vi3x9BDF, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_3x4() local 261 …vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi3x7BDF, wasm_v32x4_shuffle(vw4567, vw4567, 0, 0, 0,… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_3x4()
|
D | 3x3s2p1-minmax-wasmsimd-x86-splat-3x4.c | 154 const v128_t vi3x7BDF = wasm_v32x4_shuffle(vi3x1357, vi3x9BDF, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_3x4() local 168 …vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi3x7BDF, wasm_v32x4_shuffle(vw4567, vw4567, 0, 0, 0,… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_3x4() 251 const v128_t vi3x7BDF = wasm_v32x4_shuffle(vi3x1357, vi3x9BDF, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_3x4() local 261 …vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi3x7BDF, wasm_v32x4_shuffle(vw4567, vw4567, 0, 0, 0,… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_3x4()
|
D | 3x3s2p1-minmax-neonfma-4x4.c | 140 const float32x4_t vi3x7BDF = vextq_f32(vi3x1357, vi3x8ACE9BDF.val[1], 3); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_4x4() local 159 vo1p0 = vfmaq_lane_f32(vo1p0, vi3x7BDF, vget_low_f32(vw4567), 0); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_4x4() 254 const float32x4_t vi3x7BDF = vextq_f32(vi3x1357, vi3x9BDF, 3); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_4x4() local 267 vo1p0 = vfmaq_lane_f32(vo1p0, vi3x7BDF, vget_low_f32(vw4567), 0); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_4x4()
|
D | 3x3s2p1-minmax-neon-4x4.c | 140 const float32x4_t vi3x7BDF = vextq_f32(vi3x1357, vi3x8ACE9BDF.val[1], 3); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_4x4() local 159 vo1p0 = vmlaq_lane_f32(vo1p0, vi3x7BDF, vget_low_f32(vw4567), 0); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_4x4() 254 const float32x4_t vi3x7BDF = vextq_f32(vi3x1357, vi3x9BDF, 3); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_4x4() local 267 vo1p0 = vmlaq_lane_f32(vo1p0, vi3x7BDF, vget_low_f32(vw4567), 0); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_4x4()
|
D | 3x3s2p1-minmax-sse-3x4.c | 167 const __m128 vi3x7BDF = _mm_move_ss(vi3xF9BD, vi3x7531); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_3x4() local 184 vo1p0 = _mm_add_ps(vo1p0, _mm_mul_ps(vi3x7BDF, vk10)); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_3x4() 270 const __m128 vi3x7BDF = _mm_move_ss(vi3xF9BD, vi3x7531); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_3x4() local 287 vo1p0 = _mm_add_ps(vo1p0, _mm_mul_ps(vi3x7BDF, vk10)); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_3x4()
|
D | 3x3s2p1-minmax-wasmsimd-arm-loadsplat-4x4.c | 190 const v128_t vi3x7BDF = wasm_v32x4_shuffle(vi3x1357, vi3x9BDF, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_4x4() local 209 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi3x7BDF, vk10)); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_4x4() 312 const v128_t vi3x7BDF = wasm_v32x4_shuffle(vi3x1357, vi3x9BDF, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_4x4() local 325 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi3x7BDF, vk10)); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_4x4()
|
D | 3x3s2p1-minmax-wasmsimd-x86-loadsplat-4x4.c | 190 const v128_t vi3x7BDF = wasm_v32x4_shuffle(vi3x1357, vi3x9BDF, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_4x4() local 209 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi3x7BDF, vk10)); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_4x4() 312 const v128_t vi3x7BDF = wasm_v32x4_shuffle(vi3x1357, vi3x9BDF, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_4x4() local 325 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi3x7BDF, vk10)); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_4x4()
|
D | 3x3s2p1-minmax-wasmsimd-arm-splat-4x4.c | 180 const v128_t vi3x7BDF = wasm_v32x4_shuffle(vi3x1357, vi3x9BDF, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_4x4() local 199 …vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi3x7BDF, wasm_v32x4_shuffle(vw4567, vw4567, 0, 0, 0,… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_4x4() 302 const v128_t vi3x7BDF = wasm_v32x4_shuffle(vi3x1357, vi3x9BDF, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_4x4() local 315 …vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi3x7BDF, wasm_v32x4_shuffle(vw4567, vw4567, 0, 0, 0,… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_4x4()
|