Home
last modified time | relevance | path

Searched refs:vi4xCDEF (Results 1 – 25 of 72) sorted by relevance

123

/external/XNNPACK/src/f32-dwconv2d-chw/gen/
D3x3s2p1-minmax-wasmsimd-x86-loadsplat-2x4.c109 const v128_t vi4xCDEF = wasm_v128_load(i4 + 4); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_2x4() local
120 const v128_t vi4x8ACE = wasm_v32x4_shuffle(vi4x89AB, vi4xCDEF, 0, 2, 4, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_2x4()
121 const v128_t vi4x9BDF = wasm_v32x4_shuffle(vi4x89AB, vi4xCDEF, 1, 3, 5, 7); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_2x4()
185 const v128_t vi4xCDEF = wasm_v128_load(i4 + 4); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_2x4() local
195 …const v128_t vi4x8ACE = wasm_v128_and(vmask_even, wasm_v32x4_shuffle(vi4x89AB, vi4xCDEF, 0, 2, 4, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_2x4()
196 …const v128_t vi4x9BDF = wasm_v128_and(vmask_odd, wasm_v32x4_shuffle(vi4x89AB, vi4xCDEF, 1, 3, 5, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_2x4()
D3x3s2p1-minmax-wasmsimd-x86-loadsplat-2x4-acc2.c109 const v128_t vi4xCDEF = wasm_v128_load(i4 + 4); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_2x4_acc2() local
120 const v128_t vi4x8ACE = wasm_v32x4_shuffle(vi4x89AB, vi4xCDEF, 0, 2, 4, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_2x4_acc2()
121 const v128_t vi4x9BDF = wasm_v32x4_shuffle(vi4x89AB, vi4xCDEF, 1, 3, 5, 7); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_2x4_acc2()
187 const v128_t vi4xCDEF = wasm_v128_load(i4 + 4); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_2x4_acc2() local
197 …const v128_t vi4x8ACE = wasm_v128_and(vmask_even, wasm_v32x4_shuffle(vi4x89AB, vi4xCDEF, 0, 2, 4, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_2x4_acc2()
198 …const v128_t vi4x9BDF = wasm_v128_and(vmask_odd, wasm_v32x4_shuffle(vi4x89AB, vi4xCDEF, 1, 3, 5, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_2x4_acc2()
D3x3s2p1-minmax-wasmsimd-arm-loadsplat-2x4-acc2.c109 const v128_t vi4xCDEF = wasm_v128_load(i4 + 4); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_2x4_acc2() local
120 const v128_t vi4x8ACE = wasm_v32x4_shuffle(vi4x89AB, vi4xCDEF, 0, 2, 4, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_2x4_acc2()
121 const v128_t vi4x9BDF = wasm_v32x4_shuffle(vi4x89AB, vi4xCDEF, 1, 3, 5, 7); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_2x4_acc2()
187 const v128_t vi4xCDEF = wasm_v128_load(i4 + 4); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_2x4_acc2() local
197 …const v128_t vi4x8ACE = wasm_v128_and(vmask_even, wasm_v32x4_shuffle(vi4x89AB, vi4xCDEF, 0, 2, 4, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_2x4_acc2()
198 …const v128_t vi4x9BDF = wasm_v128_and(vmask_odd, wasm_v32x4_shuffle(vi4x89AB, vi4xCDEF, 1, 3, 5, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_2x4_acc2()
D3x3s2p1-minmax-wasmsimd-arm-splat-2x4-acc2.c99 const v128_t vi4xCDEF = wasm_v128_load(i4 + 4); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_2x4_acc2() local
110 const v128_t vi4x8ACE = wasm_v32x4_shuffle(vi4x89AB, vi4xCDEF, 0, 2, 4, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_2x4_acc2()
111 const v128_t vi4x9BDF = wasm_v32x4_shuffle(vi4x89AB, vi4xCDEF, 1, 3, 5, 7); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_2x4_acc2()
177 const v128_t vi4xCDEF = wasm_v128_load(i4 + 4); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_2x4_acc2() local
187 …const v128_t vi4x8ACE = wasm_v128_and(vmask_even, wasm_v32x4_shuffle(vi4x89AB, vi4xCDEF, 0, 2, 4, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_2x4_acc2()
188 …const v128_t vi4x9BDF = wasm_v128_and(vmask_odd, wasm_v32x4_shuffle(vi4x89AB, vi4xCDEF, 1, 3, 5, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_2x4_acc2()
D3x3s2p1-minmax-wasmsimd-arm-splat-2x4.c99 const v128_t vi4xCDEF = wasm_v128_load(i4 + 4); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_2x4() local
110 const v128_t vi4x8ACE = wasm_v32x4_shuffle(vi4x89AB, vi4xCDEF, 0, 2, 4, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_2x4()
111 const v128_t vi4x9BDF = wasm_v32x4_shuffle(vi4x89AB, vi4xCDEF, 1, 3, 5, 7); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_2x4()
175 const v128_t vi4xCDEF = wasm_v128_load(i4 + 4); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_2x4() local
185 …const v128_t vi4x8ACE = wasm_v128_and(vmask_even, wasm_v32x4_shuffle(vi4x89AB, vi4xCDEF, 0, 2, 4, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_2x4()
186 …const v128_t vi4x9BDF = wasm_v128_and(vmask_odd, wasm_v32x4_shuffle(vi4x89AB, vi4xCDEF, 1, 3, 5, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_2x4()
D3x3s2p1-minmax-wasmsimd-arm-loadsplat-2x4.c109 const v128_t vi4xCDEF = wasm_v128_load(i4 + 4); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_2x4() local
120 const v128_t vi4x8ACE = wasm_v32x4_shuffle(vi4x89AB, vi4xCDEF, 0, 2, 4, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_2x4()
121 const v128_t vi4x9BDF = wasm_v32x4_shuffle(vi4x89AB, vi4xCDEF, 1, 3, 5, 7); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_2x4()
185 const v128_t vi4xCDEF = wasm_v128_load(i4 + 4); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_2x4() local
195 …const v128_t vi4x8ACE = wasm_v128_and(vmask_even, wasm_v32x4_shuffle(vi4x89AB, vi4xCDEF, 0, 2, 4, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_2x4()
196 …const v128_t vi4x9BDF = wasm_v128_and(vmask_odd, wasm_v32x4_shuffle(vi4x89AB, vi4xCDEF, 1, 3, 5, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_2x4()
D3x3s2p1-minmax-sse-2x4-acc2.c100 const __m128 vi4xCDEF = _mm_loadu_ps(i4 + 4); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_2x4_acc2() local
111 const __m128 vi4x8ACE = _mm_shuffle_ps(vi4x89AB, vi4xCDEF, _MM_SHUFFLE(2, 0, 2, 0)); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_2x4_acc2()
112 const __m128 vi4x9BDF = _mm_shuffle_ps(vi4x89AB, vi4xCDEF, _MM_SHUFFLE(3, 1, 3, 1)); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_2x4_acc2()
179 const __m128 vi4xCDEF = _mm_loadu_ps(i4 + 4); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_2x4_acc2() local
189 …const __m128 vi4x8ACE = _mm_and_ps(vmask_even, _mm_shuffle_ps(vi4x89AB, vi4xCDEF, _MM_SHUFFLE(2, 0… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_2x4_acc2()
190 …const __m128 vi4x9BDF = _mm_and_ps(vmask_odd, _mm_shuffle_ps(vi4x89AB, vi4xCDEF, _MM_SHUFFLE(3, 1… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_2x4_acc2()
D3x3s2p1-minmax-sse-2x4.c100 const __m128 vi4xCDEF = _mm_loadu_ps(i4 + 4); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_2x4() local
111 const __m128 vi4x8ACE = _mm_shuffle_ps(vi4x89AB, vi4xCDEF, _MM_SHUFFLE(2, 0, 2, 0)); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_2x4()
112 const __m128 vi4x9BDF = _mm_shuffle_ps(vi4x89AB, vi4xCDEF, _MM_SHUFFLE(3, 1, 3, 1)); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_2x4()
177 const __m128 vi4xCDEF = _mm_loadu_ps(i4 + 4); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_2x4() local
187 …const __m128 vi4x8ACE = _mm_and_ps(vmask_even, _mm_shuffle_ps(vi4x89AB, vi4xCDEF, _MM_SHUFFLE(2, 0… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_2x4()
188 …const __m128 vi4x9BDF = _mm_and_ps(vmask_odd, _mm_shuffle_ps(vi4x89AB, vi4xCDEF, _MM_SHUFFLE(3, 1… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_2x4()
D3x3s2p1-minmax-wasmsimd-x86-splat-2x4.c99 const v128_t vi4xCDEF = wasm_v128_load(i4 + 4); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_2x4() local
110 const v128_t vi4x8ACE = wasm_v32x4_shuffle(vi4x89AB, vi4xCDEF, 0, 2, 4, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_2x4()
111 const v128_t vi4x9BDF = wasm_v32x4_shuffle(vi4x89AB, vi4xCDEF, 1, 3, 5, 7); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_2x4()
175 const v128_t vi4xCDEF = wasm_v128_load(i4 + 4); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_2x4() local
185 …const v128_t vi4x8ACE = wasm_v128_and(vmask_even, wasm_v32x4_shuffle(vi4x89AB, vi4xCDEF, 0, 2, 4, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_2x4()
186 …const v128_t vi4x9BDF = wasm_v128_and(vmask_odd, wasm_v32x4_shuffle(vi4x89AB, vi4xCDEF, 1, 3, 5, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_2x4()
D3x3s2p1-minmax-wasmsimd-x86-splat-2x4-acc2.c99 const v128_t vi4xCDEF = wasm_v128_load(i4 + 4); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_2x4_acc2() local
110 const v128_t vi4x8ACE = wasm_v32x4_shuffle(vi4x89AB, vi4xCDEF, 0, 2, 4, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_2x4_acc2()
111 const v128_t vi4x9BDF = wasm_v32x4_shuffle(vi4x89AB, vi4xCDEF, 1, 3, 5, 7); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_2x4_acc2()
177 const v128_t vi4xCDEF = wasm_v128_load(i4 + 4); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_2x4_acc2() local
187 …const v128_t vi4x8ACE = wasm_v128_and(vmask_even, wasm_v32x4_shuffle(vi4x89AB, vi4xCDEF, 0, 2, 4, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_2x4_acc2()
188 …const v128_t vi4x9BDF = wasm_v128_and(vmask_odd, wasm_v32x4_shuffle(vi4x89AB, vi4xCDEF, 1, 3, 5, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_2x4_acc2()
D3x3s2p1-minmax-wasmsimd-x86-loadsplat-3x4.c122 const v128_t vi4xCDEF = wasm_v128_load(i4 + 4); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_3x4() local
139 const v128_t vi4x8ACE = wasm_v32x4_shuffle(vi4x89AB, vi4xCDEF, 0, 2, 4, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_3x4()
140 const v128_t vi4x9BDF = wasm_v32x4_shuffle(vi4x89AB, vi4xCDEF, 1, 3, 5, 7); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_3x4()
225 const v128_t vi4xCDEF = wasm_v128_load(i4 + 4); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_3x4() local
239 …const v128_t vi4x8ACE = wasm_v128_and(vmask_even, wasm_v32x4_shuffle(vi4x89AB, vi4xCDEF, 0, 2, 4, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_3x4()
240 …const v128_t vi4x9BDF = wasm_v128_and(vmask_odd, wasm_v32x4_shuffle(vi4x89AB, vi4xCDEF, 1, 3, 5, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_3x4()
D3x3s2p1-minmax-wasmsimd-arm-loadsplat-3x4.c122 const v128_t vi4xCDEF = wasm_v128_load(i4 + 4); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_3x4() local
139 const v128_t vi4x8ACE = wasm_v32x4_shuffle(vi4x89AB, vi4xCDEF, 0, 2, 4, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_3x4()
140 const v128_t vi4x9BDF = wasm_v32x4_shuffle(vi4x89AB, vi4xCDEF, 1, 3, 5, 7); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_3x4()
225 const v128_t vi4xCDEF = wasm_v128_load(i4 + 4); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_3x4() local
239 …const v128_t vi4x8ACE = wasm_v128_and(vmask_even, wasm_v32x4_shuffle(vi4x89AB, vi4xCDEF, 0, 2, 4, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_3x4()
240 …const v128_t vi4x9BDF = wasm_v128_and(vmask_odd, wasm_v32x4_shuffle(vi4x89AB, vi4xCDEF, 1, 3, 5, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_3x4()
D3x3s2p1-minmax-wasmsimd-arm-splat-3x4.c112 const v128_t vi4xCDEF = wasm_v128_load(i4 + 4); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_3x4() local
129 const v128_t vi4x8ACE = wasm_v32x4_shuffle(vi4x89AB, vi4xCDEF, 0, 2, 4, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_3x4()
130 const v128_t vi4x9BDF = wasm_v32x4_shuffle(vi4x89AB, vi4xCDEF, 1, 3, 5, 7); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_3x4()
215 const v128_t vi4xCDEF = wasm_v128_load(i4 + 4); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_3x4() local
229 …const v128_t vi4x8ACE = wasm_v128_and(vmask_even, wasm_v32x4_shuffle(vi4x89AB, vi4xCDEF, 0, 2, 4, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_3x4()
230 …const v128_t vi4x9BDF = wasm_v128_and(vmask_odd, wasm_v32x4_shuffle(vi4x89AB, vi4xCDEF, 1, 3, 5, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_3x4()
D3x3s2p1-minmax-wasmsimd-x86-splat-3x4.c112 const v128_t vi4xCDEF = wasm_v128_load(i4 + 4); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_3x4() local
129 const v128_t vi4x8ACE = wasm_v32x4_shuffle(vi4x89AB, vi4xCDEF, 0, 2, 4, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_3x4()
130 const v128_t vi4x9BDF = wasm_v32x4_shuffle(vi4x89AB, vi4xCDEF, 1, 3, 5, 7); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_3x4()
215 const v128_t vi4xCDEF = wasm_v128_load(i4 + 4); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_3x4() local
229 …const v128_t vi4x8ACE = wasm_v128_and(vmask_even, wasm_v32x4_shuffle(vi4x89AB, vi4xCDEF, 0, 2, 4, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_3x4()
230 …const v128_t vi4x9BDF = wasm_v128_and(vmask_odd, wasm_v32x4_shuffle(vi4x89AB, vi4xCDEF, 1, 3, 5, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_3x4()
D3x3s2p1-minmax-sse-3x4.c112 const __m128 vi4xCDEF = _mm_loadu_ps(i4 + 4); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_3x4() local
129 const __m128 vi4x8ACE = _mm_shuffle_ps(vi4x89AB, vi4xCDEF, _MM_SHUFFLE(2, 0, 2, 0)); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_3x4()
130 const __m128 vi4x9BDF = _mm_shuffle_ps(vi4x89AB, vi4xCDEF, _MM_SHUFFLE(3, 1, 3, 1)); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_3x4()
218 const __m128 vi4xCDEF = _mm_loadu_ps(i4 + 4); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_3x4() local
232 …const __m128 vi4x8ACE = _mm_and_ps(vmask_even, _mm_shuffle_ps(vi4x89AB, vi4xCDEF, _MM_SHUFFLE(2, 0… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_3x4()
233 …const __m128 vi4x9BDF = _mm_and_ps(vmask_odd, _mm_shuffle_ps(vi4x89AB, vi4xCDEF, _MM_SHUFFLE(3, 1… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_3x4()
D3x3s2p1-minmax-wasmsimd-arm-loadsplat-4x4.c135 const v128_t vi4xCDEF = wasm_v128_load(i4 + 4); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_4x4() local
158 const v128_t vi4x8ACE = wasm_v32x4_shuffle(vi4x89AB, vi4xCDEF, 0, 2, 4, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_4x4()
159 const v128_t vi4x9BDF = wasm_v32x4_shuffle(vi4x89AB, vi4xCDEF, 1, 3, 5, 7); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_4x4()
265 const v128_t vi4xCDEF = wasm_v128_load(i4 + 4); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_4x4() local
283 …const v128_t vi4x8ACE = wasm_v128_and(vmask_even, wasm_v32x4_shuffle(vi4x89AB, vi4xCDEF, 0, 2, 4, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_4x4()
284 …const v128_t vi4x9BDF = wasm_v128_and(vmask_odd, wasm_v32x4_shuffle(vi4x89AB, vi4xCDEF, 1, 3, 5, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_4x4()
D3x3s2p1-minmax-wasmsimd-x86-loadsplat-4x4.c135 const v128_t vi4xCDEF = wasm_v128_load(i4 + 4); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_4x4() local
158 const v128_t vi4x8ACE = wasm_v32x4_shuffle(vi4x89AB, vi4xCDEF, 0, 2, 4, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_4x4()
159 const v128_t vi4x9BDF = wasm_v32x4_shuffle(vi4x89AB, vi4xCDEF, 1, 3, 5, 7); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_4x4()
265 const v128_t vi4xCDEF = wasm_v128_load(i4 + 4); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_4x4() local
283 …const v128_t vi4x8ACE = wasm_v128_and(vmask_even, wasm_v32x4_shuffle(vi4x89AB, vi4xCDEF, 0, 2, 4, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_4x4()
284 …const v128_t vi4x9BDF = wasm_v128_and(vmask_odd, wasm_v32x4_shuffle(vi4x89AB, vi4xCDEF, 1, 3, 5, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_4x4()
D3x3s2p1-minmax-wasmsimd-arm-splat-4x4.c125 const v128_t vi4xCDEF = wasm_v128_load(i4 + 4); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_4x4() local
148 const v128_t vi4x8ACE = wasm_v32x4_shuffle(vi4x89AB, vi4xCDEF, 0, 2, 4, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_4x4()
149 const v128_t vi4x9BDF = wasm_v32x4_shuffle(vi4x89AB, vi4xCDEF, 1, 3, 5, 7); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_4x4()
255 const v128_t vi4xCDEF = wasm_v128_load(i4 + 4); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_4x4() local
273 …const v128_t vi4x8ACE = wasm_v128_and(vmask_even, wasm_v32x4_shuffle(vi4x89AB, vi4xCDEF, 0, 2, 4, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_4x4()
274 …const v128_t vi4x9BDF = wasm_v128_and(vmask_odd, wasm_v32x4_shuffle(vi4x89AB, vi4xCDEF, 1, 3, 5, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_4x4()
D3x3s2p1-minmax-wasmsimd-x86-splat-4x4.c125 const v128_t vi4xCDEF = wasm_v128_load(i4 + 4); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_4x4() local
148 const v128_t vi4x8ACE = wasm_v32x4_shuffle(vi4x89AB, vi4xCDEF, 0, 2, 4, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_4x4()
149 const v128_t vi4x9BDF = wasm_v32x4_shuffle(vi4x89AB, vi4xCDEF, 1, 3, 5, 7); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_4x4()
255 const v128_t vi4xCDEF = wasm_v128_load(i4 + 4); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_4x4() local
273 …const v128_t vi4x8ACE = wasm_v128_and(vmask_even, wasm_v32x4_shuffle(vi4x89AB, vi4xCDEF, 0, 2, 4, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_4x4()
274 …const v128_t vi4x9BDF = wasm_v128_and(vmask_odd, wasm_v32x4_shuffle(vi4x89AB, vi4xCDEF, 1, 3, 5, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_4x4()
D3x3s2p1-minmax-sse-4x4.c124 const __m128 vi4xCDEF = _mm_loadu_ps(i4 + 4); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_4x4() local
147 const __m128 vi4x8ACE = _mm_shuffle_ps(vi4x89AB, vi4xCDEF, _MM_SHUFFLE(2, 0, 2, 0)); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_4x4()
148 const __m128 vi4x9BDF = _mm_shuffle_ps(vi4x89AB, vi4xCDEF, _MM_SHUFFLE(3, 1, 3, 1)); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_4x4()
259 const __m128 vi4xCDEF = _mm_loadu_ps(i4 + 4); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_4x4() local
277 …const __m128 vi4x8ACE = _mm_and_ps(vmask_even, _mm_shuffle_ps(vi4x89AB, vi4xCDEF, _MM_SHUFFLE(2, 0… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_4x4()
278 …const __m128 vi4x9BDF = _mm_and_ps(vmask_odd, _mm_shuffle_ps(vi4x89AB, vi4xCDEF, _MM_SHUFFLE(3, 1… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_4x4()
D3x3s2p1-minmax-sse-5x4.c136 const __m128 vi4xCDEF = _mm_loadu_ps(i4 + 4); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_5x4() local
165 const __m128 vi4x8ACE = _mm_shuffle_ps(vi4x89AB, vi4xCDEF, _MM_SHUFFLE(2, 0, 2, 0)); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_5x4()
166 const __m128 vi4x9BDF = _mm_shuffle_ps(vi4x89AB, vi4xCDEF, _MM_SHUFFLE(3, 1, 3, 1)); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_5x4()
300 const __m128 vi4xCDEF = _mm_loadu_ps(i4 + 4); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_5x4() local
322 …const __m128 vi4x8ACE = _mm_and_ps(vmask_even, _mm_shuffle_ps(vi4x89AB, vi4xCDEF, _MM_SHUFFLE(2, 0… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_5x4()
323 …const __m128 vi4x9BDF = _mm_and_ps(vmask_odd, _mm_shuffle_ps(vi4x89AB, vi4xCDEF, _MM_SHUFFLE(3, 1… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_5x4()
D3x3s2p1-minmax-sse-6x4.c148 const __m128 vi4xCDEF = _mm_loadu_ps(i4 + 4); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_6x4() local
183 const __m128 vi4x8ACE = _mm_shuffle_ps(vi4x89AB, vi4xCDEF, _MM_SHUFFLE(2, 0, 2, 0)); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_6x4()
184 const __m128 vi4x9BDF = _mm_shuffle_ps(vi4x89AB, vi4xCDEF, _MM_SHUFFLE(3, 1, 3, 1)); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_6x4()
341 const __m128 vi4xCDEF = _mm_loadu_ps(i4 + 4); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_6x4() local
367 …const __m128 vi4x8ACE = _mm_and_ps(vmask_even, _mm_shuffle_ps(vi4x89AB, vi4xCDEF, _MM_SHUFFLE(2, 0… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_6x4()
368 …const __m128 vi4x9BDF = _mm_and_ps(vmask_odd, _mm_shuffle_ps(vi4x89AB, vi4xCDEF, _MM_SHUFFLE(3, 1… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_6x4()
D5x5s2p2-minmax-wasmsimd-arm-loadsplat-1x4-acc3.c120 const v128_t vi4xCDEF = wasm_v128_load(i4 + 4); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_loadsplat_1x4_acc3() local
131 v128_t vi4x8ACE = wasm_v32x4_shuffle(vi4x89AB, vi4xCDEF, 0, 2, 4, 6); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_loadsplat_1x4_acc3()
132 v128_t vi4x9BDF = wasm_v32x4_shuffle(vi4x89AB, vi4xCDEF, 1, 3, 5, 7); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_loadsplat_1x4_acc3()
D5x5s2p2-minmax-wasmsimd-arm-loadsplat-1x4-acc2.c120 const v128_t vi4xCDEF = wasm_v128_load(i4 + 4); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_loadsplat_1x4_acc2() local
131 v128_t vi4x8ACE = wasm_v32x4_shuffle(vi4x89AB, vi4xCDEF, 0, 2, 4, 6); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_loadsplat_1x4_acc2()
132 v128_t vi4x9BDF = wasm_v32x4_shuffle(vi4x89AB, vi4xCDEF, 1, 3, 5, 7); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_loadsplat_1x4_acc2()
D5x5s2p2-minmax-wasmsimd-x86-loadsplat-1x4.c120 const v128_t vi4xCDEF = wasm_v128_load(i4 + 4); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_loadsplat_1x4() local
131 v128_t vi4x8ACE = wasm_v32x4_shuffle(vi4x89AB, vi4xCDEF, 0, 2, 4, 6); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_loadsplat_1x4()
132 v128_t vi4x9BDF = wasm_v32x4_shuffle(vi4x89AB, vi4xCDEF, 1, 3, 5, 7); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_loadsplat_1x4()

123