Home
last modified time | relevance | path

Searched refs:vi6x89AB (Results 1 – 25 of 88) sorted by relevance

1234

/external/XNNPACK/src/f32-dwconv2d-chw/gen/
D3x3s2p1-minmax-wasmsimd-x86-loadsplat-3x4.c127 const v128_t vi6x89AB = wasm_v128_load(i6); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_3x4() local
143 const v128_t vi6x8ACE = wasm_v32x4_shuffle(vi6x89AB, vi6xCDEF, 0, 2, 4, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_3x4()
144 const v128_t vi6x9BDF = wasm_v32x4_shuffle(vi6x89AB, vi6xCDEF, 1, 3, 5, 7); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_3x4()
228 const v128_t vi6x89AB = wasm_v128_load(i6); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_3x4() local
243 …const v128_t vi6x8ACE = wasm_v128_and(vmask_even, wasm_v32x4_shuffle(vi6x89AB, vi6xCDEF, 0, 2, 4, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_3x4()
244 …const v128_t vi6x9BDF = wasm_v128_and(vmask_odd, wasm_v32x4_shuffle(vi6x89AB, vi6xCDEF, 1, 3, 5, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_3x4()
D3x3s2p1-minmax-wasmsimd-arm-loadsplat-3x4.c127 const v128_t vi6x89AB = wasm_v128_load(i6); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_3x4() local
143 const v128_t vi6x8ACE = wasm_v32x4_shuffle(vi6x89AB, vi6xCDEF, 0, 2, 4, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_3x4()
144 const v128_t vi6x9BDF = wasm_v32x4_shuffle(vi6x89AB, vi6xCDEF, 1, 3, 5, 7); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_3x4()
228 const v128_t vi6x89AB = wasm_v128_load(i6); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_3x4() local
243 …const v128_t vi6x8ACE = wasm_v128_and(vmask_even, wasm_v32x4_shuffle(vi6x89AB, vi6xCDEF, 0, 2, 4, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_3x4()
244 …const v128_t vi6x9BDF = wasm_v128_and(vmask_odd, wasm_v32x4_shuffle(vi6x89AB, vi6xCDEF, 1, 3, 5, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_3x4()
D3x3s2p1-minmax-wasmsimd-arm-splat-3x4.c117 const v128_t vi6x89AB = wasm_v128_load(i6); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_3x4() local
133 const v128_t vi6x8ACE = wasm_v32x4_shuffle(vi6x89AB, vi6xCDEF, 0, 2, 4, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_3x4()
134 const v128_t vi6x9BDF = wasm_v32x4_shuffle(vi6x89AB, vi6xCDEF, 1, 3, 5, 7); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_3x4()
218 const v128_t vi6x89AB = wasm_v128_load(i6); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_3x4() local
233 …const v128_t vi6x8ACE = wasm_v128_and(vmask_even, wasm_v32x4_shuffle(vi6x89AB, vi6xCDEF, 0, 2, 4, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_3x4()
234 …const v128_t vi6x9BDF = wasm_v128_and(vmask_odd, wasm_v32x4_shuffle(vi6x89AB, vi6xCDEF, 1, 3, 5, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_3x4()
D3x3s2p1-minmax-wasmsimd-x86-splat-3x4.c117 const v128_t vi6x89AB = wasm_v128_load(i6); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_3x4() local
133 const v128_t vi6x8ACE = wasm_v32x4_shuffle(vi6x89AB, vi6xCDEF, 0, 2, 4, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_3x4()
134 const v128_t vi6x9BDF = wasm_v32x4_shuffle(vi6x89AB, vi6xCDEF, 1, 3, 5, 7); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_3x4()
218 const v128_t vi6x89AB = wasm_v128_load(i6); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_3x4() local
233 …const v128_t vi6x8ACE = wasm_v128_and(vmask_even, wasm_v32x4_shuffle(vi6x89AB, vi6xCDEF, 0, 2, 4, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_3x4()
234 …const v128_t vi6x9BDF = wasm_v128_and(vmask_odd, wasm_v32x4_shuffle(vi6x89AB, vi6xCDEF, 1, 3, 5, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_3x4()
D3x3s2p1-minmax-sse-3x4.c117 const __m128 vi6x89AB = _mm_loadu_ps(i6); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_3x4() local
133 const __m128 vi6x8ACE = _mm_shuffle_ps(vi6x89AB, vi6xCDEF, _MM_SHUFFLE(2, 0, 2, 0)); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_3x4()
134 const __m128 vi6x9BDF = _mm_shuffle_ps(vi6x89AB, vi6xCDEF, _MM_SHUFFLE(3, 1, 3, 1)); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_3x4()
221 const __m128 vi6x89AB = _mm_loadu_ps(i6); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_3x4() local
236 …const __m128 vi6x8ACE = _mm_and_ps(vmask_even, _mm_shuffle_ps(vi6x89AB, vi6xCDEF, _MM_SHUFFLE(2, 0… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_3x4()
237 …const __m128 vi6x9BDF = _mm_and_ps(vmask_odd, _mm_shuffle_ps(vi6x89AB, vi6xCDEF, _MM_SHUFFLE(3, 1… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_3x4()
D3x3s2p1-minmax-wasmsimd-arm-loadsplat-4x4.c140 const v128_t vi6x89AB = wasm_v128_load(i6); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_4x4() local
162 const v128_t vi6x8ACE = wasm_v32x4_shuffle(vi6x89AB, vi6xCDEF, 0, 2, 4, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_4x4()
163 const v128_t vi6x9BDF = wasm_v32x4_shuffle(vi6x89AB, vi6xCDEF, 1, 3, 5, 7); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_4x4()
268 const v128_t vi6x89AB = wasm_v128_load(i6); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_4x4() local
287 …const v128_t vi6x8ACE = wasm_v128_and(vmask_even, wasm_v32x4_shuffle(vi6x89AB, vi6xCDEF, 0, 2, 4, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_4x4()
288 …const v128_t vi6x9BDF = wasm_v128_and(vmask_odd, wasm_v32x4_shuffle(vi6x89AB, vi6xCDEF, 1, 3, 5, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_4x4()
D5x5p2-minmax-neonfma-3x4.c104 const float32x4_t vi6x89AB = vld1q_f32(i6); i6 += 4; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_3x4() local
195 const float32x4_t vi6x5678 = vextq_f32(vi6x4567, vi6x89AB, 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_3x4()
229 const float32x4_t vi6x6789 = vextq_f32(vi6x4567, vi6x89AB, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_3x4()
230 vi6x4567 = vi6x89AB; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_3x4()
277 float32x4_t vi6x89AB = vld1q_f32(i6); i6 += 4; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_3x4() local
285 vi6x89AB = vreinterpretq_f32_u32(vandq_u32(vmask, vreinterpretq_u32_f32(vi6x89AB))); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_3x4()
376 const float32x4_t vi6x5678 = vextq_f32(vi6x4567, vi6x89AB, 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_3x4()
410 const float32x4_t vi6x6789 = vextq_f32(vi6x4567, vi6x89AB, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_3x4()
411 vi6x4567 = vi6x89AB; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_3x4()
D5x5p2-minmax-wasmsimd-arm-loadsplat-3x4.c133 const v128_t vi6x89AB = wasm_v128_load(i6); i6 += 4; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_3x4() local
224 const v128_t vi6x5678 = wasm_v32x4_shuffle(vi6x4567, vi6x89AB, 1, 2, 3, 4); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_3x4()
258 const v128_t vi6x6789 = wasm_v32x4_shuffle(vi6x4567, vi6x89AB, 2, 3, 4, 5); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_3x4()
259 vi6x4567 = vi6x89AB; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_3x4()
305 v128_t vi6x89AB = wasm_v128_load(i6); i6 += 4; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_3x4() local
313 vi6x89AB = wasm_v128_and(vmask, vi6x89AB); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_3x4()
404 const v128_t vi6x5678 = wasm_v32x4_shuffle(vi6x4567, vi6x89AB, 1, 2, 3, 4); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_3x4()
438 const v128_t vi6x6789 = wasm_v32x4_shuffle(vi6x4567, vi6x89AB, 2, 3, 4, 5); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_3x4()
439 vi6x4567 = vi6x89AB; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_3x4()
D5x5p2-minmax-wasmsimd-arm-loadsplat-3x4-acc2.c133 const v128_t vi6x89AB = wasm_v128_load(i6); i6 += 4; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_3x4_acc2() local
224 const v128_t vi6x5678 = wasm_v32x4_shuffle(vi6x4567, vi6x89AB, 1, 2, 3, 4); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_3x4_acc2()
258 const v128_t vi6x6789 = wasm_v32x4_shuffle(vi6x4567, vi6x89AB, 2, 3, 4, 5); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_3x4_acc2()
259 vi6x4567 = vi6x89AB; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_3x4_acc2()
308 v128_t vi6x89AB = wasm_v128_load(i6); i6 += 4; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_3x4_acc2() local
316 vi6x89AB = wasm_v128_and(vmask, vi6x89AB); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_3x4_acc2()
407 const v128_t vi6x5678 = wasm_v32x4_shuffle(vi6x4567, vi6x89AB, 1, 2, 3, 4); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_3x4_acc2()
441 const v128_t vi6x6789 = wasm_v32x4_shuffle(vi6x4567, vi6x89AB, 2, 3, 4, 5); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_3x4_acc2()
442 vi6x4567 = vi6x89AB; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_3x4_acc2()
D5x5p2-minmax-wasmsimd-x86-loadsplat-3x4.c133 const v128_t vi6x89AB = wasm_v128_load(i6); i6 += 4; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_3x4() local
224 const v128_t vi6x5678 = wasm_v32x4_shuffle(vi6x4567, vi6x89AB, 1, 2, 3, 4); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_3x4()
258 const v128_t vi6x6789 = wasm_v32x4_shuffle(vi6x4567, vi6x89AB, 2, 3, 4, 5); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_3x4()
259 vi6x4567 = vi6x89AB; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_3x4()
305 v128_t vi6x89AB = wasm_v128_load(i6); i6 += 4; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_3x4() local
313 vi6x89AB = wasm_v128_and(vmask, vi6x89AB); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_3x4()
404 const v128_t vi6x5678 = wasm_v32x4_shuffle(vi6x4567, vi6x89AB, 1, 2, 3, 4); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_3x4()
438 const v128_t vi6x6789 = wasm_v32x4_shuffle(vi6x4567, vi6x89AB, 2, 3, 4, 5); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_3x4()
439 vi6x4567 = vi6x89AB; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_3x4()
D5x5p2-minmax-neon-3x4-acc2.c104 const float32x4_t vi6x89AB = vld1q_f32(i6); i6 += 4; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_3x4_acc2() local
195 const float32x4_t vi6x5678 = vextq_f32(vi6x4567, vi6x89AB, 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_3x4_acc2()
229 const float32x4_t vi6x6789 = vextq_f32(vi6x4567, vi6x89AB, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_3x4_acc2()
230 vi6x4567 = vi6x89AB; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_3x4_acc2()
280 float32x4_t vi6x89AB = vld1q_f32(i6); i6 += 4; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_3x4_acc2() local
288 vi6x89AB = vreinterpretq_f32_u32(vandq_u32(vmask, vreinterpretq_u32_f32(vi6x89AB))); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_3x4_acc2()
379 const float32x4_t vi6x5678 = vextq_f32(vi6x4567, vi6x89AB, 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_3x4_acc2()
413 const float32x4_t vi6x6789 = vextq_f32(vi6x4567, vi6x89AB, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_3x4_acc2()
414 vi6x4567 = vi6x89AB; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_3x4_acc2()
D5x5p2-minmax-neon-3x4.c104 const float32x4_t vi6x89AB = vld1q_f32(i6); i6 += 4; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_3x4() local
195 const float32x4_t vi6x5678 = vextq_f32(vi6x4567, vi6x89AB, 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_3x4()
229 const float32x4_t vi6x6789 = vextq_f32(vi6x4567, vi6x89AB, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_3x4()
230 vi6x4567 = vi6x89AB; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_3x4()
277 float32x4_t vi6x89AB = vld1q_f32(i6); i6 += 4; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_3x4() local
285 vi6x89AB = vreinterpretq_f32_u32(vandq_u32(vmask, vreinterpretq_u32_f32(vi6x89AB))); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_3x4()
376 const float32x4_t vi6x5678 = vextq_f32(vi6x4567, vi6x89AB, 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_3x4()
410 const float32x4_t vi6x6789 = vextq_f32(vi6x4567, vi6x89AB, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_3x4()
411 vi6x4567 = vi6x89AB; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_3x4()
D5x5p2-minmax-neonfma-3x4-acc2.c104 const float32x4_t vi6x89AB = vld1q_f32(i6); i6 += 4; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_3x4_acc2() local
195 const float32x4_t vi6x5678 = vextq_f32(vi6x4567, vi6x89AB, 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_3x4_acc2()
229 const float32x4_t vi6x6789 = vextq_f32(vi6x4567, vi6x89AB, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_3x4_acc2()
230 vi6x4567 = vi6x89AB; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_3x4_acc2()
280 float32x4_t vi6x89AB = vld1q_f32(i6); i6 += 4; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_3x4_acc2() local
288 vi6x89AB = vreinterpretq_f32_u32(vandq_u32(vmask, vreinterpretq_u32_f32(vi6x89AB))); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_3x4_acc2()
379 const float32x4_t vi6x5678 = vextq_f32(vi6x4567, vi6x89AB, 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_3x4_acc2()
413 const float32x4_t vi6x6789 = vextq_f32(vi6x4567, vi6x89AB, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_3x4_acc2()
414 vi6x4567 = vi6x89AB; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_3x4_acc2()
D3x3s2p1-minmax-wasmsimd-x86-loadsplat-4x4.c140 const v128_t vi6x89AB = wasm_v128_load(i6); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_4x4() local
162 const v128_t vi6x8ACE = wasm_v32x4_shuffle(vi6x89AB, vi6xCDEF, 0, 2, 4, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_4x4()
163 const v128_t vi6x9BDF = wasm_v32x4_shuffle(vi6x89AB, vi6xCDEF, 1, 3, 5, 7); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_4x4()
268 const v128_t vi6x89AB = wasm_v128_load(i6); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_4x4() local
287 …const v128_t vi6x8ACE = wasm_v128_and(vmask_even, wasm_v32x4_shuffle(vi6x89AB, vi6xCDEF, 0, 2, 4, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_4x4()
288 …const v128_t vi6x9BDF = wasm_v128_and(vmask_odd, wasm_v32x4_shuffle(vi6x89AB, vi6xCDEF, 1, 3, 5, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_4x4()
D3x3s2p1-minmax-wasmsimd-arm-splat-4x4.c130 const v128_t vi6x89AB = wasm_v128_load(i6); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_4x4() local
152 const v128_t vi6x8ACE = wasm_v32x4_shuffle(vi6x89AB, vi6xCDEF, 0, 2, 4, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_4x4()
153 const v128_t vi6x9BDF = wasm_v32x4_shuffle(vi6x89AB, vi6xCDEF, 1, 3, 5, 7); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_4x4()
258 const v128_t vi6x89AB = wasm_v128_load(i6); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_4x4() local
277 …const v128_t vi6x8ACE = wasm_v128_and(vmask_even, wasm_v32x4_shuffle(vi6x89AB, vi6xCDEF, 0, 2, 4, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_4x4()
278 …const v128_t vi6x9BDF = wasm_v128_and(vmask_odd, wasm_v32x4_shuffle(vi6x89AB, vi6xCDEF, 1, 3, 5, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_4x4()
D3x3s2p1-minmax-wasmsimd-x86-splat-4x4.c130 const v128_t vi6x89AB = wasm_v128_load(i6); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_4x4() local
152 const v128_t vi6x8ACE = wasm_v32x4_shuffle(vi6x89AB, vi6xCDEF, 0, 2, 4, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_4x4()
153 const v128_t vi6x9BDF = wasm_v32x4_shuffle(vi6x89AB, vi6xCDEF, 1, 3, 5, 7); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_4x4()
258 const v128_t vi6x89AB = wasm_v128_load(i6); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_4x4() local
277 …const v128_t vi6x8ACE = wasm_v128_and(vmask_even, wasm_v32x4_shuffle(vi6x89AB, vi6xCDEF, 0, 2, 4, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_4x4()
278 …const v128_t vi6x9BDF = wasm_v128_and(vmask_odd, wasm_v32x4_shuffle(vi6x89AB, vi6xCDEF, 1, 3, 5, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_4x4()
D3x3s2p1-minmax-sse-4x4.c129 const __m128 vi6x89AB = _mm_loadu_ps(i6); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_4x4() local
151 const __m128 vi6x8ACE = _mm_shuffle_ps(vi6x89AB, vi6xCDEF, _MM_SHUFFLE(2, 0, 2, 0)); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_4x4()
152 const __m128 vi6x9BDF = _mm_shuffle_ps(vi6x89AB, vi6xCDEF, _MM_SHUFFLE(3, 1, 3, 1)); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_4x4()
262 const __m128 vi6x89AB = _mm_loadu_ps(i6); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_4x4() local
281 …const __m128 vi6x8ACE = _mm_and_ps(vmask_even, _mm_shuffle_ps(vi6x89AB, vi6xCDEF, _MM_SHUFFLE(2, 0… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_4x4()
282 …const __m128 vi6x9BDF = _mm_and_ps(vmask_odd, _mm_shuffle_ps(vi6x89AB, vi6xCDEF, _MM_SHUFFLE(3, 1… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_4x4()
D5x5p2-minmax-wasmsimd-x86-loadsplat-3x4-acc2.c133 const v128_t vi6x89AB = wasm_v128_load(i6); i6 += 4; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_3x4_acc2() local
224 const v128_t vi6x5678 = wasm_v32x4_shuffle(vi6x4567, vi6x89AB, 1, 2, 3, 4); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_3x4_acc2()
258 const v128_t vi6x6789 = wasm_v32x4_shuffle(vi6x4567, vi6x89AB, 2, 3, 4, 5); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_3x4_acc2()
259 vi6x4567 = vi6x89AB; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_3x4_acc2()
308 v128_t vi6x89AB = wasm_v128_load(i6); i6 += 4; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_3x4_acc2() local
316 vi6x89AB = wasm_v128_and(vmask, vi6x89AB); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_3x4_acc2()
407 const v128_t vi6x5678 = wasm_v32x4_shuffle(vi6x4567, vi6x89AB, 1, 2, 3, 4); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_3x4_acc2()
441 const v128_t vi6x6789 = wasm_v32x4_shuffle(vi6x4567, vi6x89AB, 2, 3, 4, 5); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_3x4_acc2()
442 vi6x4567 = vi6x89AB; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_3x4_acc2()
D5x5p2-minmax-neon-4x4.c113 const float32x4_t vi6x89AB = vld1q_f32(i6); i6 += 4; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_4x4() local
223 const float32x4_t vi6x5678 = vextq_f32(vi6x4567, vi6x89AB, 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_4x4()
263 const float32x4_t vi6x6789 = vextq_f32(vi6x4567, vi6x89AB, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_4x4()
264 vi6x4567 = vi6x89AB; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_4x4()
322 float32x4_t vi6x89AB = vld1q_f32(i6); i6 += 4; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_4x4() local
331 vi6x89AB = vreinterpretq_f32_u32(vandq_u32(vmask, vreinterpretq_u32_f32(vi6x89AB))); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_4x4()
441 const float32x4_t vi6x5678 = vextq_f32(vi6x4567, vi6x89AB, 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_4x4()
481 const float32x4_t vi6x6789 = vextq_f32(vi6x4567, vi6x89AB, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_4x4()
482 vi6x4567 = vi6x89AB; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_4x4()
D5x5p2-minmax-wasmsimd-x86-splat-3x4-acc2.c107 const v128_t vi6x89AB = wasm_v128_load(i6); i6 += 4; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_splat_3x4_acc2() local
198 const v128_t vi6x5678 = wasm_v32x4_shuffle(vi6x4567, vi6x89AB, 1, 2, 3, 4); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_splat_3x4_acc2()
232 const v128_t vi6x6789 = wasm_v32x4_shuffle(vi6x4567, vi6x89AB, 2, 3, 4, 5); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_splat_3x4_acc2()
233 vi6x4567 = vi6x89AB; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_splat_3x4_acc2()
282 v128_t vi6x89AB = wasm_v128_load(i6); i6 += 4; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_splat_3x4_acc2() local
290 vi6x89AB = wasm_v128_and(vmask, vi6x89AB); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_splat_3x4_acc2()
381 const v128_t vi6x5678 = wasm_v32x4_shuffle(vi6x4567, vi6x89AB, 1, 2, 3, 4); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_splat_3x4_acc2()
415 const v128_t vi6x6789 = wasm_v32x4_shuffle(vi6x4567, vi6x89AB, 2, 3, 4, 5); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_splat_3x4_acc2()
416 vi6x4567 = vi6x89AB; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_splat_3x4_acc2()
D5x5p2-minmax-wasmsimd-arm-splat-3x4.c107 const v128_t vi6x89AB = wasm_v128_load(i6); i6 += 4; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_3x4() local
198 const v128_t vi6x5678 = wasm_v32x4_shuffle(vi6x4567, vi6x89AB, 1, 2, 3, 4); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_3x4()
232 const v128_t vi6x6789 = wasm_v32x4_shuffle(vi6x4567, vi6x89AB, 2, 3, 4, 5); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_3x4()
233 vi6x4567 = vi6x89AB; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_3x4()
279 v128_t vi6x89AB = wasm_v128_load(i6); i6 += 4; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_3x4() local
287 vi6x89AB = wasm_v128_and(vmask, vi6x89AB); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_3x4()
378 const v128_t vi6x5678 = wasm_v32x4_shuffle(vi6x4567, vi6x89AB, 1, 2, 3, 4); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_3x4()
412 const v128_t vi6x6789 = wasm_v32x4_shuffle(vi6x4567, vi6x89AB, 2, 3, 4, 5); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_3x4()
413 vi6x4567 = vi6x89AB; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_3x4()
D5x5p2-minmax-neon-4x4-acc2.c113 const float32x4_t vi6x89AB = vld1q_f32(i6); i6 += 4; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_4x4_acc2() local
223 const float32x4_t vi6x5678 = vextq_f32(vi6x4567, vi6x89AB, 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_4x4_acc2()
263 const float32x4_t vi6x6789 = vextq_f32(vi6x4567, vi6x89AB, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_4x4_acc2()
264 vi6x4567 = vi6x89AB; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_4x4_acc2()
326 float32x4_t vi6x89AB = vld1q_f32(i6); i6 += 4; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_4x4_acc2() local
335 vi6x89AB = vreinterpretq_f32_u32(vandq_u32(vmask, vreinterpretq_u32_f32(vi6x89AB))); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_4x4_acc2()
445 const float32x4_t vi6x5678 = vextq_f32(vi6x4567, vi6x89AB, 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_4x4_acc2()
485 const float32x4_t vi6x6789 = vextq_f32(vi6x4567, vi6x89AB, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_4x4_acc2()
486 vi6x4567 = vi6x89AB; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_4x4_acc2()
D5x5p2-minmax-wasmsimd-arm-splat-3x4-acc2.c107 const v128_t vi6x89AB = wasm_v128_load(i6); i6 += 4; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_3x4_acc2() local
198 const v128_t vi6x5678 = wasm_v32x4_shuffle(vi6x4567, vi6x89AB, 1, 2, 3, 4); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_3x4_acc2()
232 const v128_t vi6x6789 = wasm_v32x4_shuffle(vi6x4567, vi6x89AB, 2, 3, 4, 5); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_3x4_acc2()
233 vi6x4567 = vi6x89AB; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_3x4_acc2()
282 v128_t vi6x89AB = wasm_v128_load(i6); i6 += 4; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_3x4_acc2() local
290 vi6x89AB = wasm_v128_and(vmask, vi6x89AB); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_3x4_acc2()
381 const v128_t vi6x5678 = wasm_v32x4_shuffle(vi6x4567, vi6x89AB, 1, 2, 3, 4); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_3x4_acc2()
415 const v128_t vi6x6789 = wasm_v32x4_shuffle(vi6x4567, vi6x89AB, 2, 3, 4, 5); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_3x4_acc2()
416 vi6x4567 = vi6x89AB; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_3x4_acc2()
D5x5p2-minmax-neonfma-4x4.c113 const float32x4_t vi6x89AB = vld1q_f32(i6); i6 += 4; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_4x4() local
223 const float32x4_t vi6x5678 = vextq_f32(vi6x4567, vi6x89AB, 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_4x4()
263 const float32x4_t vi6x6789 = vextq_f32(vi6x4567, vi6x89AB, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_4x4()
264 vi6x4567 = vi6x89AB; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_4x4()
322 float32x4_t vi6x89AB = vld1q_f32(i6); i6 += 4; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_4x4() local
331 vi6x89AB = vreinterpretq_f32_u32(vandq_u32(vmask, vreinterpretq_u32_f32(vi6x89AB))); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_4x4()
441 const float32x4_t vi6x5678 = vextq_f32(vi6x4567, vi6x89AB, 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_4x4()
481 const float32x4_t vi6x6789 = vextq_f32(vi6x4567, vi6x89AB, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_4x4()
482 vi6x4567 = vi6x89AB; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_4x4()
D5x5p2-minmax-neonfma-4x4-acc2.c113 const float32x4_t vi6x89AB = vld1q_f32(i6); i6 += 4; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_4x4_acc2() local
223 const float32x4_t vi6x5678 = vextq_f32(vi6x4567, vi6x89AB, 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_4x4_acc2()
263 const float32x4_t vi6x6789 = vextq_f32(vi6x4567, vi6x89AB, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_4x4_acc2()
264 vi6x4567 = vi6x89AB; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_4x4_acc2()
326 float32x4_t vi6x89AB = vld1q_f32(i6); i6 += 4; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_4x4_acc2() local
335 vi6x89AB = vreinterpretq_f32_u32(vandq_u32(vmask, vreinterpretq_u32_f32(vi6x89AB))); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_4x4_acc2()
445 const float32x4_t vi6x5678 = vextq_f32(vi6x4567, vi6x89AB, 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_4x4_acc2()
485 const float32x4_t vi6x6789 = vextq_f32(vi6x4567, vi6x89AB, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_4x4_acc2()
486 vi6x4567 = vi6x89AB; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_4x4_acc2()

1234