Home
last modified time | relevance | path

Searched refs:vi3xCDEF (Results 1 – 25 of 75) sorted by relevance

123

/external/XNNPACK/src/f32-dwconv2d-chw/gen/
D3x3s2p1-minmax-wasmsimd-x86-loadsplat-2x4.c106 const v128_t vi3xCDEF = wasm_v128_load(i3 + 4); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_2x4() local
118 const v128_t vi3x8ACE = wasm_v32x4_shuffle(vi3x89AB, vi3xCDEF, 0, 2, 4, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_2x4()
119 const v128_t vi3x9BDF = wasm_v32x4_shuffle(vi3x89AB, vi3xCDEF, 1, 3, 5, 7); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_2x4()
183 const v128_t vi3xCDEF = wasm_v128_load(i3 + 4); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_2x4() local
193 …const v128_t vi3x8ACE = wasm_v128_and(vmask_even, wasm_v32x4_shuffle(vi3x89AB, vi3xCDEF, 0, 2, 4, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_2x4()
194 …const v128_t vi3x9BDF = wasm_v128_and(vmask_odd, wasm_v32x4_shuffle(vi3x89AB, vi3xCDEF, 1, 3, 5, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_2x4()
D3x3s2p1-minmax-wasmsimd-x86-loadsplat-2x4-acc2.c106 const v128_t vi3xCDEF = wasm_v128_load(i3 + 4); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_2x4_acc2() local
118 const v128_t vi3x8ACE = wasm_v32x4_shuffle(vi3x89AB, vi3xCDEF, 0, 2, 4, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_2x4_acc2()
119 const v128_t vi3x9BDF = wasm_v32x4_shuffle(vi3x89AB, vi3xCDEF, 1, 3, 5, 7); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_2x4_acc2()
185 const v128_t vi3xCDEF = wasm_v128_load(i3 + 4); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_2x4_acc2() local
195 …const v128_t vi3x8ACE = wasm_v128_and(vmask_even, wasm_v32x4_shuffle(vi3x89AB, vi3xCDEF, 0, 2, 4, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_2x4_acc2()
196 …const v128_t vi3x9BDF = wasm_v128_and(vmask_odd, wasm_v32x4_shuffle(vi3x89AB, vi3xCDEF, 1, 3, 5, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_2x4_acc2()
D3x3s2p1-minmax-wasmsimd-arm-loadsplat-2x4-acc2.c106 const v128_t vi3xCDEF = wasm_v128_load(i3 + 4); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_2x4_acc2() local
118 const v128_t vi3x8ACE = wasm_v32x4_shuffle(vi3x89AB, vi3xCDEF, 0, 2, 4, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_2x4_acc2()
119 const v128_t vi3x9BDF = wasm_v32x4_shuffle(vi3x89AB, vi3xCDEF, 1, 3, 5, 7); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_2x4_acc2()
185 const v128_t vi3xCDEF = wasm_v128_load(i3 + 4); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_2x4_acc2() local
195 …const v128_t vi3x8ACE = wasm_v128_and(vmask_even, wasm_v32x4_shuffle(vi3x89AB, vi3xCDEF, 0, 2, 4, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_2x4_acc2()
196 …const v128_t vi3x9BDF = wasm_v128_and(vmask_odd, wasm_v32x4_shuffle(vi3x89AB, vi3xCDEF, 1, 3, 5, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_2x4_acc2()
D3x3s2p1-minmax-wasmsimd-arm-splat-2x4-acc2.c96 const v128_t vi3xCDEF = wasm_v128_load(i3 + 4); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_2x4_acc2() local
108 const v128_t vi3x8ACE = wasm_v32x4_shuffle(vi3x89AB, vi3xCDEF, 0, 2, 4, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_2x4_acc2()
109 const v128_t vi3x9BDF = wasm_v32x4_shuffle(vi3x89AB, vi3xCDEF, 1, 3, 5, 7); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_2x4_acc2()
175 const v128_t vi3xCDEF = wasm_v128_load(i3 + 4); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_2x4_acc2() local
185 …const v128_t vi3x8ACE = wasm_v128_and(vmask_even, wasm_v32x4_shuffle(vi3x89AB, vi3xCDEF, 0, 2, 4, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_2x4_acc2()
186 …const v128_t vi3x9BDF = wasm_v128_and(vmask_odd, wasm_v32x4_shuffle(vi3x89AB, vi3xCDEF, 1, 3, 5, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_2x4_acc2()
D3x3s2p1-minmax-wasmsimd-arm-splat-2x4.c96 const v128_t vi3xCDEF = wasm_v128_load(i3 + 4); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_2x4() local
108 const v128_t vi3x8ACE = wasm_v32x4_shuffle(vi3x89AB, vi3xCDEF, 0, 2, 4, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_2x4()
109 const v128_t vi3x9BDF = wasm_v32x4_shuffle(vi3x89AB, vi3xCDEF, 1, 3, 5, 7); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_2x4()
173 const v128_t vi3xCDEF = wasm_v128_load(i3 + 4); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_2x4() local
183 …const v128_t vi3x8ACE = wasm_v128_and(vmask_even, wasm_v32x4_shuffle(vi3x89AB, vi3xCDEF, 0, 2, 4, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_2x4()
184 …const v128_t vi3x9BDF = wasm_v128_and(vmask_odd, wasm_v32x4_shuffle(vi3x89AB, vi3xCDEF, 1, 3, 5, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_2x4()
D3x3s2p1-minmax-wasmsimd-arm-loadsplat-2x4.c106 const v128_t vi3xCDEF = wasm_v128_load(i3 + 4); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_2x4() local
118 const v128_t vi3x8ACE = wasm_v32x4_shuffle(vi3x89AB, vi3xCDEF, 0, 2, 4, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_2x4()
119 const v128_t vi3x9BDF = wasm_v32x4_shuffle(vi3x89AB, vi3xCDEF, 1, 3, 5, 7); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_2x4()
183 const v128_t vi3xCDEF = wasm_v128_load(i3 + 4); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_2x4() local
193 …const v128_t vi3x8ACE = wasm_v128_and(vmask_even, wasm_v32x4_shuffle(vi3x89AB, vi3xCDEF, 0, 2, 4, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_2x4()
194 …const v128_t vi3x9BDF = wasm_v128_and(vmask_odd, wasm_v32x4_shuffle(vi3x89AB, vi3xCDEF, 1, 3, 5, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_2x4()
D3x3s2p1-minmax-sse-2x4-acc2.c97 const __m128 vi3xCDEF = _mm_loadu_ps(i3 + 4); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_2x4_acc2() local
109 const __m128 vi3x8ACE = _mm_shuffle_ps(vi3x89AB, vi3xCDEF, _MM_SHUFFLE(2, 0, 2, 0)); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_2x4_acc2()
110 const __m128 vi3x9BDF = _mm_shuffle_ps(vi3x89AB, vi3xCDEF, _MM_SHUFFLE(3, 1, 3, 1)); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_2x4_acc2()
177 const __m128 vi3xCDEF = _mm_loadu_ps(i3 + 4); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_2x4_acc2() local
187 …const __m128 vi3x8ACE = _mm_and_ps(vmask_even, _mm_shuffle_ps(vi3x89AB, vi3xCDEF, _MM_SHUFFLE(2, 0… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_2x4_acc2()
188 …const __m128 vi3x9BDF = _mm_and_ps(vmask_odd, _mm_shuffle_ps(vi3x89AB, vi3xCDEF, _MM_SHUFFLE(3, 1… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_2x4_acc2()
D3x3s2p1-minmax-sse-2x4.c97 const __m128 vi3xCDEF = _mm_loadu_ps(i3 + 4); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_2x4() local
109 const __m128 vi3x8ACE = _mm_shuffle_ps(vi3x89AB, vi3xCDEF, _MM_SHUFFLE(2, 0, 2, 0)); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_2x4()
110 const __m128 vi3x9BDF = _mm_shuffle_ps(vi3x89AB, vi3xCDEF, _MM_SHUFFLE(3, 1, 3, 1)); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_2x4()
175 const __m128 vi3xCDEF = _mm_loadu_ps(i3 + 4); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_2x4() local
185 …const __m128 vi3x8ACE = _mm_and_ps(vmask_even, _mm_shuffle_ps(vi3x89AB, vi3xCDEF, _MM_SHUFFLE(2, 0… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_2x4()
186 …const __m128 vi3x9BDF = _mm_and_ps(vmask_odd, _mm_shuffle_ps(vi3x89AB, vi3xCDEF, _MM_SHUFFLE(3, 1… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_2x4()
D3x3s2p1-minmax-wasmsimd-x86-splat-2x4.c96 const v128_t vi3xCDEF = wasm_v128_load(i3 + 4); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_2x4() local
108 const v128_t vi3x8ACE = wasm_v32x4_shuffle(vi3x89AB, vi3xCDEF, 0, 2, 4, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_2x4()
109 const v128_t vi3x9BDF = wasm_v32x4_shuffle(vi3x89AB, vi3xCDEF, 1, 3, 5, 7); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_2x4()
173 const v128_t vi3xCDEF = wasm_v128_load(i3 + 4); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_2x4() local
183 …const v128_t vi3x8ACE = wasm_v128_and(vmask_even, wasm_v32x4_shuffle(vi3x89AB, vi3xCDEF, 0, 2, 4, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_2x4()
184 …const v128_t vi3x9BDF = wasm_v128_and(vmask_odd, wasm_v32x4_shuffle(vi3x89AB, vi3xCDEF, 1, 3, 5, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_2x4()
D3x3s2p1-minmax-wasmsimd-x86-splat-2x4-acc2.c96 const v128_t vi3xCDEF = wasm_v128_load(i3 + 4); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_2x4_acc2() local
108 const v128_t vi3x8ACE = wasm_v32x4_shuffle(vi3x89AB, vi3xCDEF, 0, 2, 4, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_2x4_acc2()
109 const v128_t vi3x9BDF = wasm_v32x4_shuffle(vi3x89AB, vi3xCDEF, 1, 3, 5, 7); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_2x4_acc2()
175 const v128_t vi3xCDEF = wasm_v128_load(i3 + 4); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_2x4_acc2() local
185 …const v128_t vi3x8ACE = wasm_v128_and(vmask_even, wasm_v32x4_shuffle(vi3x89AB, vi3xCDEF, 0, 2, 4, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_2x4_acc2()
186 …const v128_t vi3x9BDF = wasm_v128_and(vmask_odd, wasm_v32x4_shuffle(vi3x89AB, vi3xCDEF, 1, 3, 5, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_2x4_acc2()
D3x3s2p1-minmax-wasmsimd-x86-loadsplat-3x4.c119 const v128_t vi3xCDEF = wasm_v128_load(i3 + 4); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_3x4() local
137 const v128_t vi3x8ACE = wasm_v32x4_shuffle(vi3x89AB, vi3xCDEF, 0, 2, 4, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_3x4()
138 const v128_t vi3x9BDF = wasm_v32x4_shuffle(vi3x89AB, vi3xCDEF, 1, 3, 5, 7); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_3x4()
223 const v128_t vi3xCDEF = wasm_v128_load(i3 + 4); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_3x4() local
237 …const v128_t vi3x8ACE = wasm_v128_and(vmask_even, wasm_v32x4_shuffle(vi3x89AB, vi3xCDEF, 0, 2, 4, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_3x4()
238 …const v128_t vi3x9BDF = wasm_v128_and(vmask_odd, wasm_v32x4_shuffle(vi3x89AB, vi3xCDEF, 1, 3, 5, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_3x4()
D3x3s2p1-minmax-wasmsimd-arm-loadsplat-3x4.c119 const v128_t vi3xCDEF = wasm_v128_load(i3 + 4); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_3x4() local
137 const v128_t vi3x8ACE = wasm_v32x4_shuffle(vi3x89AB, vi3xCDEF, 0, 2, 4, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_3x4()
138 const v128_t vi3x9BDF = wasm_v32x4_shuffle(vi3x89AB, vi3xCDEF, 1, 3, 5, 7); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_3x4()
223 const v128_t vi3xCDEF = wasm_v128_load(i3 + 4); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_3x4() local
237 …const v128_t vi3x8ACE = wasm_v128_and(vmask_even, wasm_v32x4_shuffle(vi3x89AB, vi3xCDEF, 0, 2, 4, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_3x4()
238 …const v128_t vi3x9BDF = wasm_v128_and(vmask_odd, wasm_v32x4_shuffle(vi3x89AB, vi3xCDEF, 1, 3, 5, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_3x4()
D3x3s2p1-minmax-wasmsimd-arm-splat-3x4.c109 const v128_t vi3xCDEF = wasm_v128_load(i3 + 4); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_3x4() local
127 const v128_t vi3x8ACE = wasm_v32x4_shuffle(vi3x89AB, vi3xCDEF, 0, 2, 4, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_3x4()
128 const v128_t vi3x9BDF = wasm_v32x4_shuffle(vi3x89AB, vi3xCDEF, 1, 3, 5, 7); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_3x4()
213 const v128_t vi3xCDEF = wasm_v128_load(i3 + 4); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_3x4() local
227 …const v128_t vi3x8ACE = wasm_v128_and(vmask_even, wasm_v32x4_shuffle(vi3x89AB, vi3xCDEF, 0, 2, 4, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_3x4()
228 …const v128_t vi3x9BDF = wasm_v128_and(vmask_odd, wasm_v32x4_shuffle(vi3x89AB, vi3xCDEF, 1, 3, 5, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_3x4()
D3x3s2p1-minmax-wasmsimd-x86-splat-3x4.c109 const v128_t vi3xCDEF = wasm_v128_load(i3 + 4); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_3x4() local
127 const v128_t vi3x8ACE = wasm_v32x4_shuffle(vi3x89AB, vi3xCDEF, 0, 2, 4, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_3x4()
128 const v128_t vi3x9BDF = wasm_v32x4_shuffle(vi3x89AB, vi3xCDEF, 1, 3, 5, 7); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_3x4()
213 const v128_t vi3xCDEF = wasm_v128_load(i3 + 4); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_3x4() local
227 …const v128_t vi3x8ACE = wasm_v128_and(vmask_even, wasm_v32x4_shuffle(vi3x89AB, vi3xCDEF, 0, 2, 4, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_3x4()
228 …const v128_t vi3x9BDF = wasm_v128_and(vmask_odd, wasm_v32x4_shuffle(vi3x89AB, vi3xCDEF, 1, 3, 5, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_3x4()
D3x3s2p1-minmax-sse-3x4.c109 const __m128 vi3xCDEF = _mm_loadu_ps(i3 + 4); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_3x4() local
127 const __m128 vi3x8ACE = _mm_shuffle_ps(vi3x89AB, vi3xCDEF, _MM_SHUFFLE(2, 0, 2, 0)); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_3x4()
128 const __m128 vi3x9BDF = _mm_shuffle_ps(vi3x89AB, vi3xCDEF, _MM_SHUFFLE(3, 1, 3, 1)); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_3x4()
216 const __m128 vi3xCDEF = _mm_loadu_ps(i3 + 4); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_3x4() local
230 …const __m128 vi3x8ACE = _mm_and_ps(vmask_even, _mm_shuffle_ps(vi3x89AB, vi3xCDEF, _MM_SHUFFLE(2, 0… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_3x4()
231 …const __m128 vi3x9BDF = _mm_and_ps(vmask_odd, _mm_shuffle_ps(vi3x89AB, vi3xCDEF, _MM_SHUFFLE(3, 1… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_3x4()
D3x3s2p1-minmax-wasmsimd-arm-loadsplat-4x4.c132 const v128_t vi3xCDEF = wasm_v128_load(i3 + 4); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_4x4() local
156 const v128_t vi3x8ACE = wasm_v32x4_shuffle(vi3x89AB, vi3xCDEF, 0, 2, 4, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_4x4()
157 const v128_t vi3x9BDF = wasm_v32x4_shuffle(vi3x89AB, vi3xCDEF, 1, 3, 5, 7); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_4x4()
263 const v128_t vi3xCDEF = wasm_v128_load(i3 + 4); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_4x4() local
281 …const v128_t vi3x8ACE = wasm_v128_and(vmask_even, wasm_v32x4_shuffle(vi3x89AB, vi3xCDEF, 0, 2, 4, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_4x4()
282 …const v128_t vi3x9BDF = wasm_v128_and(vmask_odd, wasm_v32x4_shuffle(vi3x89AB, vi3xCDEF, 1, 3, 5, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_4x4()
D3x3s2p1-minmax-wasmsimd-x86-loadsplat-4x4.c132 const v128_t vi3xCDEF = wasm_v128_load(i3 + 4); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_4x4() local
156 const v128_t vi3x8ACE = wasm_v32x4_shuffle(vi3x89AB, vi3xCDEF, 0, 2, 4, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_4x4()
157 const v128_t vi3x9BDF = wasm_v32x4_shuffle(vi3x89AB, vi3xCDEF, 1, 3, 5, 7); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_4x4()
263 const v128_t vi3xCDEF = wasm_v128_load(i3 + 4); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_4x4() local
281 …const v128_t vi3x8ACE = wasm_v128_and(vmask_even, wasm_v32x4_shuffle(vi3x89AB, vi3xCDEF, 0, 2, 4, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_4x4()
282 …const v128_t vi3x9BDF = wasm_v128_and(vmask_odd, wasm_v32x4_shuffle(vi3x89AB, vi3xCDEF, 1, 3, 5, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_4x4()
D3x3s2p1-minmax-wasmsimd-arm-splat-4x4.c122 const v128_t vi3xCDEF = wasm_v128_load(i3 + 4); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_4x4() local
146 const v128_t vi3x8ACE = wasm_v32x4_shuffle(vi3x89AB, vi3xCDEF, 0, 2, 4, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_4x4()
147 const v128_t vi3x9BDF = wasm_v32x4_shuffle(vi3x89AB, vi3xCDEF, 1, 3, 5, 7); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_4x4()
253 const v128_t vi3xCDEF = wasm_v128_load(i3 + 4); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_4x4() local
271 …const v128_t vi3x8ACE = wasm_v128_and(vmask_even, wasm_v32x4_shuffle(vi3x89AB, vi3xCDEF, 0, 2, 4, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_4x4()
272 …const v128_t vi3x9BDF = wasm_v128_and(vmask_odd, wasm_v32x4_shuffle(vi3x89AB, vi3xCDEF, 1, 3, 5, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_4x4()
D3x3s2p1-minmax-wasmsimd-x86-splat-4x4.c122 const v128_t vi3xCDEF = wasm_v128_load(i3 + 4); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_4x4() local
146 const v128_t vi3x8ACE = wasm_v32x4_shuffle(vi3x89AB, vi3xCDEF, 0, 2, 4, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_4x4()
147 const v128_t vi3x9BDF = wasm_v32x4_shuffle(vi3x89AB, vi3xCDEF, 1, 3, 5, 7); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_4x4()
253 const v128_t vi3xCDEF = wasm_v128_load(i3 + 4); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_4x4() local
271 …const v128_t vi3x8ACE = wasm_v128_and(vmask_even, wasm_v32x4_shuffle(vi3x89AB, vi3xCDEF, 0, 2, 4, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_4x4()
272 …const v128_t vi3x9BDF = wasm_v128_and(vmask_odd, wasm_v32x4_shuffle(vi3x89AB, vi3xCDEF, 1, 3, 5, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_4x4()
D3x3s2p1-minmax-sse-4x4.c121 const __m128 vi3xCDEF = _mm_loadu_ps(i3 + 4); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_4x4() local
145 const __m128 vi3x8ACE = _mm_shuffle_ps(vi3x89AB, vi3xCDEF, _MM_SHUFFLE(2, 0, 2, 0)); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_4x4()
146 const __m128 vi3x9BDF = _mm_shuffle_ps(vi3x89AB, vi3xCDEF, _MM_SHUFFLE(3, 1, 3, 1)); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_4x4()
257 const __m128 vi3xCDEF = _mm_loadu_ps(i3 + 4); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_4x4() local
275 …const __m128 vi3x8ACE = _mm_and_ps(vmask_even, _mm_shuffle_ps(vi3x89AB, vi3xCDEF, _MM_SHUFFLE(2, 0… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_4x4()
276 …const __m128 vi3x9BDF = _mm_and_ps(vmask_odd, _mm_shuffle_ps(vi3x89AB, vi3xCDEF, _MM_SHUFFLE(3, 1… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_4x4()
D3x3s2p1-minmax-sse-5x4.c133 const __m128 vi3xCDEF = _mm_loadu_ps(i3 + 4); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_5x4() local
163 const __m128 vi3x8ACE = _mm_shuffle_ps(vi3x89AB, vi3xCDEF, _MM_SHUFFLE(2, 0, 2, 0)); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_5x4()
164 const __m128 vi3x9BDF = _mm_shuffle_ps(vi3x89AB, vi3xCDEF, _MM_SHUFFLE(3, 1, 3, 1)); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_5x4()
298 const __m128 vi3xCDEF = _mm_loadu_ps(i3 + 4); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_5x4() local
320 …const __m128 vi3x8ACE = _mm_and_ps(vmask_even, _mm_shuffle_ps(vi3x89AB, vi3xCDEF, _MM_SHUFFLE(2, 0… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_5x4()
321 …const __m128 vi3x9BDF = _mm_and_ps(vmask_odd, _mm_shuffle_ps(vi3x89AB, vi3xCDEF, _MM_SHUFFLE(3, 1… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_5x4()
D3x3s2p1-minmax-sse-6x4.c145 const __m128 vi3xCDEF = _mm_loadu_ps(i3 + 4); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_6x4() local
181 const __m128 vi3x8ACE = _mm_shuffle_ps(vi3x89AB, vi3xCDEF, _MM_SHUFFLE(2, 0, 2, 0)); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_6x4()
182 const __m128 vi3x9BDF = _mm_shuffle_ps(vi3x89AB, vi3xCDEF, _MM_SHUFFLE(3, 1, 3, 1)); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_6x4()
339 const __m128 vi3xCDEF = _mm_loadu_ps(i3 + 4); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_6x4() local
365 …const __m128 vi3x8ACE = _mm_and_ps(vmask_even, _mm_shuffle_ps(vi3x89AB, vi3xCDEF, _MM_SHUFFLE(2, 0… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_6x4()
366 …const __m128 vi3x9BDF = _mm_and_ps(vmask_odd, _mm_shuffle_ps(vi3x89AB, vi3xCDEF, _MM_SHUFFLE(3, 1… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_6x4()
/external/XNNPACK/src/f32-prelu/gen/
Dwasmsimd-minmax-4x16.c84 v128_t vi3xCDEF = wasm_v128_load(i3 + 12); in xnn_f32_prelu_ukernel__wasmsimd_minmax_4x16() local
117 v128_t vacc3xCDEF = wasm_i32x4_max(vi3xCDEF, vzero); in xnn_f32_prelu_ukernel__wasmsimd_minmax_4x16()
118 vi3xCDEF = wasm_i32x4_min(vi3xCDEF, vzero); in xnn_f32_prelu_ukernel__wasmsimd_minmax_4x16()
135 vacc3xCDEF = wasm_f32x4_add(vacc3xCDEF, wasm_f32x4_mul(vi3xCDEF, vwCDEF)); in xnn_f32_prelu_ukernel__wasmsimd_minmax_4x16()
Dwasmsimd-bitselect-4x16.c84 const v128_t vi3xCDEF = wasm_v128_load(i3 + 12); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_4x16() local
117 v128_t vacc3xCDEF = wasm_f32x4_mul(vi3xCDEF, vwCDEF); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_4x16()
118 const v128_t vmask3xCDEF = wasm_i32x4_lt(vi3xCDEF, vzero); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_4x16()
135 vacc3xCDEF = wasm_v128_bitselect(vacc3xCDEF, vi3xCDEF, vmask3xCDEF); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_4x16()
Dneon-4x16.c79 const float32x4_t vi3xCDEF = vld1q_f32(i3); i3 += 4; in xnn_f32_prelu_ukernel__neon_4x16() local
111 float32x4_t vacc3xCDEF = vmulq_f32(vi3xCDEF, vwCDEF); in xnn_f32_prelu_ukernel__neon_4x16()
112 const uint32x4_t vm3xCDEF = vcltq_s32(vreinterpretq_s32_f32(vi3xCDEF), vmovq_n_s32(0)); in xnn_f32_prelu_ukernel__neon_4x16()
129 vacc3xCDEF = vbslq_f32(vm3xCDEF, vacc3xCDEF, vi3xCDEF); in xnn_f32_prelu_ukernel__neon_4x16()

123