Home
last modified time | relevance | path

Searched refs:vi5x89AB (Results 1 – 25 of 117) sorted by relevance

12345

/external/XNNPACK/src/f32-dwconv2d-chw/gen/
D5x5p2-minmax-neon-2x4-acc2.c94 const float32x4_t vi5x89AB = vld1q_f32(i5); i5 += 4; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_2x4_acc2() local
166 const float32x4_t vi5x5678 = vextq_f32(vi5x4567, vi5x89AB, 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_2x4_acc2()
193 const float32x4_t vi5x6789 = vextq_f32(vi5x4567, vi5x89AB, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_2x4_acc2()
194 vi5x4567 = vi5x89AB; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_2x4_acc2()
233 float32x4_t vi5x89AB = vld1q_f32(i5); i5 += 4; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_2x4_acc2() local
240 vi5x89AB = vreinterpretq_f32_u32(vandq_u32(vmask, vreinterpretq_u32_f32(vi5x89AB))); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_2x4_acc2()
312 const float32x4_t vi5x5678 = vextq_f32(vi5x4567, vi5x89AB, 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_2x4_acc2()
339 const float32x4_t vi5x6789 = vextq_f32(vi5x4567, vi5x89AB, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_2x4_acc2()
340 vi5x4567 = vi5x89AB; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_2x4_acc2()
D5x5p2-minmax-neonfma-2x4.c94 const float32x4_t vi5x89AB = vld1q_f32(i5); i5 += 4; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_2x4() local
166 const float32x4_t vi5x5678 = vextq_f32(vi5x4567, vi5x89AB, 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_2x4()
193 const float32x4_t vi5x6789 = vextq_f32(vi5x4567, vi5x89AB, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_2x4()
194 vi5x4567 = vi5x89AB; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_2x4()
231 float32x4_t vi5x89AB = vld1q_f32(i5); i5 += 4; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_2x4() local
238 vi5x89AB = vreinterpretq_f32_u32(vandq_u32(vmask, vreinterpretq_u32_f32(vi5x89AB))); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_2x4()
310 const float32x4_t vi5x5678 = vextq_f32(vi5x4567, vi5x89AB, 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_2x4()
337 const float32x4_t vi5x6789 = vextq_f32(vi5x4567, vi5x89AB, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_2x4()
338 vi5x4567 = vi5x89AB; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_2x4()
D5x5p2-minmax-neon-2x4.c94 const float32x4_t vi5x89AB = vld1q_f32(i5); i5 += 4; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_2x4() local
166 const float32x4_t vi5x5678 = vextq_f32(vi5x4567, vi5x89AB, 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_2x4()
193 const float32x4_t vi5x6789 = vextq_f32(vi5x4567, vi5x89AB, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_2x4()
194 vi5x4567 = vi5x89AB; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_2x4()
231 float32x4_t vi5x89AB = vld1q_f32(i5); i5 += 4; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_2x4() local
238 vi5x89AB = vreinterpretq_f32_u32(vandq_u32(vmask, vreinterpretq_u32_f32(vi5x89AB))); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_2x4()
310 const float32x4_t vi5x5678 = vextq_f32(vi5x4567, vi5x89AB, 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_2x4()
337 const float32x4_t vi5x6789 = vextq_f32(vi5x4567, vi5x89AB, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_2x4()
338 vi5x4567 = vi5x89AB; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_2x4()
D5x5p2-minmax-neonfma-2x4-acc2.c94 const float32x4_t vi5x89AB = vld1q_f32(i5); i5 += 4; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_2x4_acc2() local
166 const float32x4_t vi5x5678 = vextq_f32(vi5x4567, vi5x89AB, 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_2x4_acc2()
193 const float32x4_t vi5x6789 = vextq_f32(vi5x4567, vi5x89AB, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_2x4_acc2()
194 vi5x4567 = vi5x89AB; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_2x4_acc2()
233 float32x4_t vi5x89AB = vld1q_f32(i5); i5 += 4; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_2x4_acc2() local
240 vi5x89AB = vreinterpretq_f32_u32(vandq_u32(vmask, vreinterpretq_u32_f32(vi5x89AB))); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_2x4_acc2()
312 const float32x4_t vi5x5678 = vextq_f32(vi5x4567, vi5x89AB, 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_2x4_acc2()
339 const float32x4_t vi5x6789 = vextq_f32(vi5x4567, vi5x89AB, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_2x4_acc2()
340 vi5x4567 = vi5x89AB; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_2x4_acc2()
D5x5p2-minmax-neonfma-2x4-acc3.c94 const float32x4_t vi5x89AB = vld1q_f32(i5); i5 += 4; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_2x4_acc3() local
166 const float32x4_t vi5x5678 = vextq_f32(vi5x4567, vi5x89AB, 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_2x4_acc3()
193 const float32x4_t vi5x6789 = vextq_f32(vi5x4567, vi5x89AB, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_2x4_acc3()
194 vi5x4567 = vi5x89AB; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_2x4_acc3()
235 float32x4_t vi5x89AB = vld1q_f32(i5); i5 += 4; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_2x4_acc3() local
242 vi5x89AB = vreinterpretq_f32_u32(vandq_u32(vmask, vreinterpretq_u32_f32(vi5x89AB))); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_2x4_acc3()
314 const float32x4_t vi5x5678 = vextq_f32(vi5x4567, vi5x89AB, 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_2x4_acc3()
341 const float32x4_t vi5x6789 = vextq_f32(vi5x4567, vi5x89AB, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_2x4_acc3()
342 vi5x4567 = vi5x89AB; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_2x4_acc3()
D5x5p2-minmax-wasmsimd-x86-loadsplat-2x4.c123 const v128_t vi5x89AB = wasm_v128_load(i5); i5 += 4; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_2x4() local
195 const v128_t vi5x5678 = wasm_v32x4_shuffle(vi5x4567, vi5x89AB, 1, 2, 3, 4); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_2x4()
222 const v128_t vi5x6789 = wasm_v32x4_shuffle(vi5x4567, vi5x89AB, 2, 3, 4, 5); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_2x4()
223 vi5x4567 = vi5x89AB; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_2x4()
259 v128_t vi5x89AB = wasm_v128_load(i5); i5 += 4; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_2x4() local
266 vi5x89AB = wasm_v128_and(vmask, vi5x89AB); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_2x4()
338 const v128_t vi5x5678 = wasm_v32x4_shuffle(vi5x4567, vi5x89AB, 1, 2, 3, 4); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_2x4()
365 const v128_t vi5x6789 = wasm_v32x4_shuffle(vi5x4567, vi5x89AB, 2, 3, 4, 5); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_2x4()
366 vi5x4567 = vi5x89AB; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_2x4()
D5x5p2-minmax-wasmsimd-arm-loadsplat-2x4-acc3.c123 const v128_t vi5x89AB = wasm_v128_load(i5); i5 += 4; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_2x4_acc3() local
195 const v128_t vi5x5678 = wasm_v32x4_shuffle(vi5x4567, vi5x89AB, 1, 2, 3, 4); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_2x4_acc3()
222 const v128_t vi5x6789 = wasm_v32x4_shuffle(vi5x4567, vi5x89AB, 2, 3, 4, 5); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_2x4_acc3()
223 vi5x4567 = vi5x89AB; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_2x4_acc3()
263 v128_t vi5x89AB = wasm_v128_load(i5); i5 += 4; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_2x4_acc3() local
270 vi5x89AB = wasm_v128_and(vmask, vi5x89AB); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_2x4_acc3()
342 const v128_t vi5x5678 = wasm_v32x4_shuffle(vi5x4567, vi5x89AB, 1, 2, 3, 4); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_2x4_acc3()
369 const v128_t vi5x6789 = wasm_v32x4_shuffle(vi5x4567, vi5x89AB, 2, 3, 4, 5); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_2x4_acc3()
370 vi5x4567 = vi5x89AB; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_2x4_acc3()
D5x5p2-minmax-wasmsimd-x86-loadsplat-2x4-acc2.c123 const v128_t vi5x89AB = wasm_v128_load(i5); i5 += 4; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_2x4_acc2() local
195 const v128_t vi5x5678 = wasm_v32x4_shuffle(vi5x4567, vi5x89AB, 1, 2, 3, 4); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_2x4_acc2()
222 const v128_t vi5x6789 = wasm_v32x4_shuffle(vi5x4567, vi5x89AB, 2, 3, 4, 5); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_2x4_acc2()
223 vi5x4567 = vi5x89AB; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_2x4_acc2()
261 v128_t vi5x89AB = wasm_v128_load(i5); i5 += 4; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_2x4_acc2() local
268 vi5x89AB = wasm_v128_and(vmask, vi5x89AB); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_2x4_acc2()
340 const v128_t vi5x5678 = wasm_v32x4_shuffle(vi5x4567, vi5x89AB, 1, 2, 3, 4); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_2x4_acc2()
367 const v128_t vi5x6789 = wasm_v32x4_shuffle(vi5x4567, vi5x89AB, 2, 3, 4, 5); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_2x4_acc2()
368 vi5x4567 = vi5x89AB; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_2x4_acc2()
D5x5p2-minmax-neon-2x4-acc3.c94 const float32x4_t vi5x89AB = vld1q_f32(i5); i5 += 4; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_2x4_acc3() local
166 const float32x4_t vi5x5678 = vextq_f32(vi5x4567, vi5x89AB, 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_2x4_acc3()
193 const float32x4_t vi5x6789 = vextq_f32(vi5x4567, vi5x89AB, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_2x4_acc3()
194 vi5x4567 = vi5x89AB; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_2x4_acc3()
235 float32x4_t vi5x89AB = vld1q_f32(i5); i5 += 4; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_2x4_acc3() local
242 vi5x89AB = vreinterpretq_f32_u32(vandq_u32(vmask, vreinterpretq_u32_f32(vi5x89AB))); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_2x4_acc3()
314 const float32x4_t vi5x5678 = vextq_f32(vi5x4567, vi5x89AB, 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_2x4_acc3()
341 const float32x4_t vi5x6789 = vextq_f32(vi5x4567, vi5x89AB, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_2x4_acc3()
342 vi5x4567 = vi5x89AB; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_2x4_acc3()
D5x5p2-minmax-wasmsimd-arm-loadsplat-2x4.c123 const v128_t vi5x89AB = wasm_v128_load(i5); i5 += 4; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_2x4() local
195 const v128_t vi5x5678 = wasm_v32x4_shuffle(vi5x4567, vi5x89AB, 1, 2, 3, 4); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_2x4()
222 const v128_t vi5x6789 = wasm_v32x4_shuffle(vi5x4567, vi5x89AB, 2, 3, 4, 5); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_2x4()
223 vi5x4567 = vi5x89AB; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_2x4()
259 v128_t vi5x89AB = wasm_v128_load(i5); i5 += 4; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_2x4() local
266 vi5x89AB = wasm_v128_and(vmask, vi5x89AB); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_2x4()
338 const v128_t vi5x5678 = wasm_v32x4_shuffle(vi5x4567, vi5x89AB, 1, 2, 3, 4); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_2x4()
365 const v128_t vi5x6789 = wasm_v32x4_shuffle(vi5x4567, vi5x89AB, 2, 3, 4, 5); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_2x4()
366 vi5x4567 = vi5x89AB; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_2x4()
D5x5p2-minmax-wasmsimd-arm-loadsplat-2x4-acc2.c123 const v128_t vi5x89AB = wasm_v128_load(i5); i5 += 4; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_2x4_acc2() local
195 const v128_t vi5x5678 = wasm_v32x4_shuffle(vi5x4567, vi5x89AB, 1, 2, 3, 4); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_2x4_acc2()
222 const v128_t vi5x6789 = wasm_v32x4_shuffle(vi5x4567, vi5x89AB, 2, 3, 4, 5); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_2x4_acc2()
223 vi5x4567 = vi5x89AB; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_2x4_acc2()
261 v128_t vi5x89AB = wasm_v128_load(i5); i5 += 4; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_2x4_acc2() local
268 vi5x89AB = wasm_v128_and(vmask, vi5x89AB); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_2x4_acc2()
340 const v128_t vi5x5678 = wasm_v32x4_shuffle(vi5x4567, vi5x89AB, 1, 2, 3, 4); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_2x4_acc2()
367 const v128_t vi5x6789 = wasm_v32x4_shuffle(vi5x4567, vi5x89AB, 2, 3, 4, 5); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_2x4_acc2()
368 vi5x4567 = vi5x89AB; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_2x4_acc2()
D3x3s2p1-minmax-wasmsimd-x86-loadsplat-3x4.c124 const v128_t vi5x89AB = wasm_v128_load(i5); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_3x4() local
141 const v128_t vi5x8ACE = wasm_v32x4_shuffle(vi5x89AB, vi5xCDEF, 0, 2, 4, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_3x4()
142 const v128_t vi5x9BDF = wasm_v32x4_shuffle(vi5x89AB, vi5xCDEF, 1, 3, 5, 7); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_3x4()
226 const v128_t vi5x89AB = wasm_v128_load(i5); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_3x4() local
241 …const v128_t vi5x8ACE = wasm_v128_and(vmask_even, wasm_v32x4_shuffle(vi5x89AB, vi5xCDEF, 0, 2, 4, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_3x4()
242 …const v128_t vi5x9BDF = wasm_v128_and(vmask_odd, wasm_v32x4_shuffle(vi5x89AB, vi5xCDEF, 1, 3, 5, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_3x4()
D3x3s2p1-minmax-wasmsimd-arm-loadsplat-3x4.c124 const v128_t vi5x89AB = wasm_v128_load(i5); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_3x4() local
141 const v128_t vi5x8ACE = wasm_v32x4_shuffle(vi5x89AB, vi5xCDEF, 0, 2, 4, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_3x4()
142 const v128_t vi5x9BDF = wasm_v32x4_shuffle(vi5x89AB, vi5xCDEF, 1, 3, 5, 7); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_3x4()
226 const v128_t vi5x89AB = wasm_v128_load(i5); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_3x4() local
241 …const v128_t vi5x8ACE = wasm_v128_and(vmask_even, wasm_v32x4_shuffle(vi5x89AB, vi5xCDEF, 0, 2, 4, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_3x4()
242 …const v128_t vi5x9BDF = wasm_v128_and(vmask_odd, wasm_v32x4_shuffle(vi5x89AB, vi5xCDEF, 1, 3, 5, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_3x4()
D3x3s2p1-minmax-wasmsimd-arm-splat-3x4.c114 const v128_t vi5x89AB = wasm_v128_load(i5); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_3x4() local
131 const v128_t vi5x8ACE = wasm_v32x4_shuffle(vi5x89AB, vi5xCDEF, 0, 2, 4, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_3x4()
132 const v128_t vi5x9BDF = wasm_v32x4_shuffle(vi5x89AB, vi5xCDEF, 1, 3, 5, 7); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_3x4()
216 const v128_t vi5x89AB = wasm_v128_load(i5); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_3x4() local
231 …const v128_t vi5x8ACE = wasm_v128_and(vmask_even, wasm_v32x4_shuffle(vi5x89AB, vi5xCDEF, 0, 2, 4, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_3x4()
232 …const v128_t vi5x9BDF = wasm_v128_and(vmask_odd, wasm_v32x4_shuffle(vi5x89AB, vi5xCDEF, 1, 3, 5, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_3x4()
D5x5p2-minmax-wasmsimd-x86-loadsplat-2x4-acc3.c123 const v128_t vi5x89AB = wasm_v128_load(i5); i5 += 4; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_2x4_acc3() local
195 const v128_t vi5x5678 = wasm_v32x4_shuffle(vi5x4567, vi5x89AB, 1, 2, 3, 4); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_2x4_acc3()
222 const v128_t vi5x6789 = wasm_v32x4_shuffle(vi5x4567, vi5x89AB, 2, 3, 4, 5); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_2x4_acc3()
223 vi5x4567 = vi5x89AB; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_2x4_acc3()
263 v128_t vi5x89AB = wasm_v128_load(i5); i5 += 4; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_2x4_acc3() local
270 vi5x89AB = wasm_v128_and(vmask, vi5x89AB); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_2x4_acc3()
342 const v128_t vi5x5678 = wasm_v32x4_shuffle(vi5x4567, vi5x89AB, 1, 2, 3, 4); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_2x4_acc3()
369 const v128_t vi5x6789 = wasm_v32x4_shuffle(vi5x4567, vi5x89AB, 2, 3, 4, 5); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_2x4_acc3()
370 vi5x4567 = vi5x89AB; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_2x4_acc3()
D5x5p2-minmax-wasmsimd-arm-splat-2x4.c97 const v128_t vi5x89AB = wasm_v128_load(i5); i5 += 4; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_2x4() local
169 const v128_t vi5x5678 = wasm_v32x4_shuffle(vi5x4567, vi5x89AB, 1, 2, 3, 4); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_2x4()
196 const v128_t vi5x6789 = wasm_v32x4_shuffle(vi5x4567, vi5x89AB, 2, 3, 4, 5); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_2x4()
197 vi5x4567 = vi5x89AB; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_2x4()
233 v128_t vi5x89AB = wasm_v128_load(i5); i5 += 4; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_2x4() local
240 vi5x89AB = wasm_v128_and(vmask, vi5x89AB); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_2x4()
312 const v128_t vi5x5678 = wasm_v32x4_shuffle(vi5x4567, vi5x89AB, 1, 2, 3, 4); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_2x4()
339 const v128_t vi5x6789 = wasm_v32x4_shuffle(vi5x4567, vi5x89AB, 2, 3, 4, 5); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_2x4()
340 vi5x4567 = vi5x89AB; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_2x4()
D5x5p2-minmax-wasmsimd-x86-splat-2x4-acc3.c97 const v128_t vi5x89AB = wasm_v128_load(i5); i5 += 4; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_splat_2x4_acc3() local
169 const v128_t vi5x5678 = wasm_v32x4_shuffle(vi5x4567, vi5x89AB, 1, 2, 3, 4); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_splat_2x4_acc3()
196 const v128_t vi5x6789 = wasm_v32x4_shuffle(vi5x4567, vi5x89AB, 2, 3, 4, 5); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_splat_2x4_acc3()
197 vi5x4567 = vi5x89AB; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_splat_2x4_acc3()
237 v128_t vi5x89AB = wasm_v128_load(i5); i5 += 4; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_splat_2x4_acc3() local
244 vi5x89AB = wasm_v128_and(vmask, vi5x89AB); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_splat_2x4_acc3()
316 const v128_t vi5x5678 = wasm_v32x4_shuffle(vi5x4567, vi5x89AB, 1, 2, 3, 4); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_splat_2x4_acc3()
343 const v128_t vi5x6789 = wasm_v32x4_shuffle(vi5x4567, vi5x89AB, 2, 3, 4, 5); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_splat_2x4_acc3()
344 vi5x4567 = vi5x89AB; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_splat_2x4_acc3()
D5x5p2-minmax-wasmsimd-x86-splat-2x4-acc2.c97 const v128_t vi5x89AB = wasm_v128_load(i5); i5 += 4; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_splat_2x4_acc2() local
169 const v128_t vi5x5678 = wasm_v32x4_shuffle(vi5x4567, vi5x89AB, 1, 2, 3, 4); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_splat_2x4_acc2()
196 const v128_t vi5x6789 = wasm_v32x4_shuffle(vi5x4567, vi5x89AB, 2, 3, 4, 5); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_splat_2x4_acc2()
197 vi5x4567 = vi5x89AB; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_splat_2x4_acc2()
235 v128_t vi5x89AB = wasm_v128_load(i5); i5 += 4; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_splat_2x4_acc2() local
242 vi5x89AB = wasm_v128_and(vmask, vi5x89AB); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_splat_2x4_acc2()
314 const v128_t vi5x5678 = wasm_v32x4_shuffle(vi5x4567, vi5x89AB, 1, 2, 3, 4); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_splat_2x4_acc2()
341 const v128_t vi5x6789 = wasm_v32x4_shuffle(vi5x4567, vi5x89AB, 2, 3, 4, 5); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_splat_2x4_acc2()
342 vi5x4567 = vi5x89AB; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_splat_2x4_acc2()
D5x5p2-minmax-wasmsimd-arm-splat-2x4-acc3.c97 const v128_t vi5x89AB = wasm_v128_load(i5); i5 += 4; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_2x4_acc3() local
169 const v128_t vi5x5678 = wasm_v32x4_shuffle(vi5x4567, vi5x89AB, 1, 2, 3, 4); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_2x4_acc3()
196 const v128_t vi5x6789 = wasm_v32x4_shuffle(vi5x4567, vi5x89AB, 2, 3, 4, 5); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_2x4_acc3()
197 vi5x4567 = vi5x89AB; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_2x4_acc3()
237 v128_t vi5x89AB = wasm_v128_load(i5); i5 += 4; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_2x4_acc3() local
244 vi5x89AB = wasm_v128_and(vmask, vi5x89AB); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_2x4_acc3()
316 const v128_t vi5x5678 = wasm_v32x4_shuffle(vi5x4567, vi5x89AB, 1, 2, 3, 4); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_2x4_acc3()
343 const v128_t vi5x6789 = wasm_v32x4_shuffle(vi5x4567, vi5x89AB, 2, 3, 4, 5); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_2x4_acc3()
344 vi5x4567 = vi5x89AB; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_2x4_acc3()
D5x5p2-minmax-wasmsimd-x86-splat-2x4.c97 const v128_t vi5x89AB = wasm_v128_load(i5); i5 += 4; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_splat_2x4() local
169 const v128_t vi5x5678 = wasm_v32x4_shuffle(vi5x4567, vi5x89AB, 1, 2, 3, 4); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_splat_2x4()
196 const v128_t vi5x6789 = wasm_v32x4_shuffle(vi5x4567, vi5x89AB, 2, 3, 4, 5); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_splat_2x4()
197 vi5x4567 = vi5x89AB; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_splat_2x4()
233 v128_t vi5x89AB = wasm_v128_load(i5); i5 += 4; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_splat_2x4() local
240 vi5x89AB = wasm_v128_and(vmask, vi5x89AB); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_splat_2x4()
312 const v128_t vi5x5678 = wasm_v32x4_shuffle(vi5x4567, vi5x89AB, 1, 2, 3, 4); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_splat_2x4()
339 const v128_t vi5x6789 = wasm_v32x4_shuffle(vi5x4567, vi5x89AB, 2, 3, 4, 5); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_splat_2x4()
340 vi5x4567 = vi5x89AB; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_splat_2x4()
D5x5p2-minmax-wasmsimd-arm-splat-2x4-acc2.c97 const v128_t vi5x89AB = wasm_v128_load(i5); i5 += 4; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_2x4_acc2() local
169 const v128_t vi5x5678 = wasm_v32x4_shuffle(vi5x4567, vi5x89AB, 1, 2, 3, 4); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_2x4_acc2()
196 const v128_t vi5x6789 = wasm_v32x4_shuffle(vi5x4567, vi5x89AB, 2, 3, 4, 5); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_2x4_acc2()
197 vi5x4567 = vi5x89AB; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_2x4_acc2()
235 v128_t vi5x89AB = wasm_v128_load(i5); i5 += 4; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_2x4_acc2() local
242 vi5x89AB = wasm_v128_and(vmask, vi5x89AB); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_2x4_acc2()
314 const v128_t vi5x5678 = wasm_v32x4_shuffle(vi5x4567, vi5x89AB, 1, 2, 3, 4); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_2x4_acc2()
341 const v128_t vi5x6789 = wasm_v32x4_shuffle(vi5x4567, vi5x89AB, 2, 3, 4, 5); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_2x4_acc2()
342 vi5x4567 = vi5x89AB; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_2x4_acc2()
D3x3s2p1-minmax-wasmsimd-x86-splat-3x4.c114 const v128_t vi5x89AB = wasm_v128_load(i5); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_3x4() local
131 const v128_t vi5x8ACE = wasm_v32x4_shuffle(vi5x89AB, vi5xCDEF, 0, 2, 4, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_3x4()
132 const v128_t vi5x9BDF = wasm_v32x4_shuffle(vi5x89AB, vi5xCDEF, 1, 3, 5, 7); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_3x4()
216 const v128_t vi5x89AB = wasm_v128_load(i5); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_3x4() local
231 …const v128_t vi5x8ACE = wasm_v128_and(vmask_even, wasm_v32x4_shuffle(vi5x89AB, vi5xCDEF, 0, 2, 4, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_3x4()
232 …const v128_t vi5x9BDF = wasm_v128_and(vmask_odd, wasm_v32x4_shuffle(vi5x89AB, vi5xCDEF, 1, 3, 5, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_3x4()
D3x3s2p1-minmax-sse-3x4.c114 const __m128 vi5x89AB = _mm_loadu_ps(i5); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_3x4() local
131 const __m128 vi5x8ACE = _mm_shuffle_ps(vi5x89AB, vi5xCDEF, _MM_SHUFFLE(2, 0, 2, 0)); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_3x4()
132 const __m128 vi5x9BDF = _mm_shuffle_ps(vi5x89AB, vi5xCDEF, _MM_SHUFFLE(3, 1, 3, 1)); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_3x4()
219 const __m128 vi5x89AB = _mm_loadu_ps(i5); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_3x4() local
234 …const __m128 vi5x8ACE = _mm_and_ps(vmask_even, _mm_shuffle_ps(vi5x89AB, vi5xCDEF, _MM_SHUFFLE(2, 0… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_3x4()
235 …const __m128 vi5x9BDF = _mm_and_ps(vmask_odd, _mm_shuffle_ps(vi5x89AB, vi5xCDEF, _MM_SHUFFLE(3, 1… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_3x4()
D3x3s2p1-minmax-wasmsimd-arm-loadsplat-4x4.c137 const v128_t vi5x89AB = wasm_v128_load(i5); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_4x4() local
160 const v128_t vi5x8ACE = wasm_v32x4_shuffle(vi5x89AB, vi5xCDEF, 0, 2, 4, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_4x4()
161 const v128_t vi5x9BDF = wasm_v32x4_shuffle(vi5x89AB, vi5xCDEF, 1, 3, 5, 7); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_4x4()
266 const v128_t vi5x89AB = wasm_v128_load(i5); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_4x4() local
285 …const v128_t vi5x8ACE = wasm_v128_and(vmask_even, wasm_v32x4_shuffle(vi5x89AB, vi5xCDEF, 0, 2, 4, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_4x4()
286 …const v128_t vi5x9BDF = wasm_v128_and(vmask_odd, wasm_v32x4_shuffle(vi5x89AB, vi5xCDEF, 1, 3, 5, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_4x4()
D5x5p2-minmax-neonfma-3x4.c103 const float32x4_t vi5x89AB = vld1q_f32(i5); i5 += 4; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_3x4() local
194 const float32x4_t vi5x5678 = vextq_f32(vi5x4567, vi5x89AB, 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_3x4()
227 const float32x4_t vi5x6789 = vextq_f32(vi5x4567, vi5x89AB, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_3x4()
228 vi5x4567 = vi5x89AB; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_3x4()
276 float32x4_t vi5x89AB = vld1q_f32(i5); i5 += 4; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_3x4() local
284 vi5x89AB = vreinterpretq_f32_u32(vandq_u32(vmask, vreinterpretq_u32_f32(vi5x89AB))); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_3x4()
375 const float32x4_t vi5x5678 = vextq_f32(vi5x4567, vi5x89AB, 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_3x4()
408 const float32x4_t vi5x6789 = vextq_f32(vi5x4567, vi5x89AB, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_3x4()
409 vi5x4567 = vi5x89AB; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_3x4()

12345