/external/XNNPACK/src/f32-dwconv2d-chw/gen/ |
D | 3x3s2p1-minmax-wasmsimd-x86-loadsplat-2x4.c | 106 const v128_t vi3xCDEF = wasm_v128_load(i3 + 4); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_2x4() local 118 const v128_t vi3x8ACE = wasm_v32x4_shuffle(vi3x89AB, vi3xCDEF, 0, 2, 4, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_2x4() 119 const v128_t vi3x9BDF = wasm_v32x4_shuffle(vi3x89AB, vi3xCDEF, 1, 3, 5, 7); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_2x4() 183 const v128_t vi3xCDEF = wasm_v128_load(i3 + 4); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_2x4() local 193 …const v128_t vi3x8ACE = wasm_v128_and(vmask_even, wasm_v32x4_shuffle(vi3x89AB, vi3xCDEF, 0, 2, 4, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_2x4() 194 …const v128_t vi3x9BDF = wasm_v128_and(vmask_odd, wasm_v32x4_shuffle(vi3x89AB, vi3xCDEF, 1, 3, 5, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_2x4()
|
D | 3x3s2p1-minmax-wasmsimd-x86-loadsplat-2x4-acc2.c | 106 const v128_t vi3xCDEF = wasm_v128_load(i3 + 4); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_2x4_acc2() local 118 const v128_t vi3x8ACE = wasm_v32x4_shuffle(vi3x89AB, vi3xCDEF, 0, 2, 4, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_2x4_acc2() 119 const v128_t vi3x9BDF = wasm_v32x4_shuffle(vi3x89AB, vi3xCDEF, 1, 3, 5, 7); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_2x4_acc2() 185 const v128_t vi3xCDEF = wasm_v128_load(i3 + 4); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_2x4_acc2() local 195 …const v128_t vi3x8ACE = wasm_v128_and(vmask_even, wasm_v32x4_shuffle(vi3x89AB, vi3xCDEF, 0, 2, 4, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_2x4_acc2() 196 …const v128_t vi3x9BDF = wasm_v128_and(vmask_odd, wasm_v32x4_shuffle(vi3x89AB, vi3xCDEF, 1, 3, 5, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_2x4_acc2()
|
D | 3x3s2p1-minmax-wasmsimd-arm-loadsplat-2x4-acc2.c | 106 const v128_t vi3xCDEF = wasm_v128_load(i3 + 4); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_2x4_acc2() local 118 const v128_t vi3x8ACE = wasm_v32x4_shuffle(vi3x89AB, vi3xCDEF, 0, 2, 4, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_2x4_acc2() 119 const v128_t vi3x9BDF = wasm_v32x4_shuffle(vi3x89AB, vi3xCDEF, 1, 3, 5, 7); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_2x4_acc2() 185 const v128_t vi3xCDEF = wasm_v128_load(i3 + 4); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_2x4_acc2() local 195 …const v128_t vi3x8ACE = wasm_v128_and(vmask_even, wasm_v32x4_shuffle(vi3x89AB, vi3xCDEF, 0, 2, 4, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_2x4_acc2() 196 …const v128_t vi3x9BDF = wasm_v128_and(vmask_odd, wasm_v32x4_shuffle(vi3x89AB, vi3xCDEF, 1, 3, 5, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_2x4_acc2()
|
D | 3x3s2p1-minmax-wasmsimd-arm-splat-2x4-acc2.c | 96 const v128_t vi3xCDEF = wasm_v128_load(i3 + 4); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_2x4_acc2() local 108 const v128_t vi3x8ACE = wasm_v32x4_shuffle(vi3x89AB, vi3xCDEF, 0, 2, 4, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_2x4_acc2() 109 const v128_t vi3x9BDF = wasm_v32x4_shuffle(vi3x89AB, vi3xCDEF, 1, 3, 5, 7); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_2x4_acc2() 175 const v128_t vi3xCDEF = wasm_v128_load(i3 + 4); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_2x4_acc2() local 185 …const v128_t vi3x8ACE = wasm_v128_and(vmask_even, wasm_v32x4_shuffle(vi3x89AB, vi3xCDEF, 0, 2, 4, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_2x4_acc2() 186 …const v128_t vi3x9BDF = wasm_v128_and(vmask_odd, wasm_v32x4_shuffle(vi3x89AB, vi3xCDEF, 1, 3, 5, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_2x4_acc2()
|
D | 3x3s2p1-minmax-wasmsimd-arm-splat-2x4.c | 96 const v128_t vi3xCDEF = wasm_v128_load(i3 + 4); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_2x4() local 108 const v128_t vi3x8ACE = wasm_v32x4_shuffle(vi3x89AB, vi3xCDEF, 0, 2, 4, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_2x4() 109 const v128_t vi3x9BDF = wasm_v32x4_shuffle(vi3x89AB, vi3xCDEF, 1, 3, 5, 7); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_2x4() 173 const v128_t vi3xCDEF = wasm_v128_load(i3 + 4); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_2x4() local 183 …const v128_t vi3x8ACE = wasm_v128_and(vmask_even, wasm_v32x4_shuffle(vi3x89AB, vi3xCDEF, 0, 2, 4, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_2x4() 184 …const v128_t vi3x9BDF = wasm_v128_and(vmask_odd, wasm_v32x4_shuffle(vi3x89AB, vi3xCDEF, 1, 3, 5, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_2x4()
|
D | 3x3s2p1-minmax-wasmsimd-arm-loadsplat-2x4.c | 106 const v128_t vi3xCDEF = wasm_v128_load(i3 + 4); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_2x4() local 118 const v128_t vi3x8ACE = wasm_v32x4_shuffle(vi3x89AB, vi3xCDEF, 0, 2, 4, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_2x4() 119 const v128_t vi3x9BDF = wasm_v32x4_shuffle(vi3x89AB, vi3xCDEF, 1, 3, 5, 7); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_2x4() 183 const v128_t vi3xCDEF = wasm_v128_load(i3 + 4); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_2x4() local 193 …const v128_t vi3x8ACE = wasm_v128_and(vmask_even, wasm_v32x4_shuffle(vi3x89AB, vi3xCDEF, 0, 2, 4, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_2x4() 194 …const v128_t vi3x9BDF = wasm_v128_and(vmask_odd, wasm_v32x4_shuffle(vi3x89AB, vi3xCDEF, 1, 3, 5, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_2x4()
|
D | 3x3s2p1-minmax-sse-2x4-acc2.c | 97 const __m128 vi3xCDEF = _mm_loadu_ps(i3 + 4); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_2x4_acc2() local 109 const __m128 vi3x8ACE = _mm_shuffle_ps(vi3x89AB, vi3xCDEF, _MM_SHUFFLE(2, 0, 2, 0)); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_2x4_acc2() 110 const __m128 vi3x9BDF = _mm_shuffle_ps(vi3x89AB, vi3xCDEF, _MM_SHUFFLE(3, 1, 3, 1)); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_2x4_acc2() 177 const __m128 vi3xCDEF = _mm_loadu_ps(i3 + 4); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_2x4_acc2() local 187 …const __m128 vi3x8ACE = _mm_and_ps(vmask_even, _mm_shuffle_ps(vi3x89AB, vi3xCDEF, _MM_SHUFFLE(2, 0… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_2x4_acc2() 188 …const __m128 vi3x9BDF = _mm_and_ps(vmask_odd, _mm_shuffle_ps(vi3x89AB, vi3xCDEF, _MM_SHUFFLE(3, 1… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_2x4_acc2()
|
D | 3x3s2p1-minmax-sse-2x4.c | 97 const __m128 vi3xCDEF = _mm_loadu_ps(i3 + 4); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_2x4() local 109 const __m128 vi3x8ACE = _mm_shuffle_ps(vi3x89AB, vi3xCDEF, _MM_SHUFFLE(2, 0, 2, 0)); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_2x4() 110 const __m128 vi3x9BDF = _mm_shuffle_ps(vi3x89AB, vi3xCDEF, _MM_SHUFFLE(3, 1, 3, 1)); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_2x4() 175 const __m128 vi3xCDEF = _mm_loadu_ps(i3 + 4); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_2x4() local 185 …const __m128 vi3x8ACE = _mm_and_ps(vmask_even, _mm_shuffle_ps(vi3x89AB, vi3xCDEF, _MM_SHUFFLE(2, 0… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_2x4() 186 …const __m128 vi3x9BDF = _mm_and_ps(vmask_odd, _mm_shuffle_ps(vi3x89AB, vi3xCDEF, _MM_SHUFFLE(3, 1… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_2x4()
|
D | 3x3s2p1-minmax-wasmsimd-x86-splat-2x4.c | 96 const v128_t vi3xCDEF = wasm_v128_load(i3 + 4); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_2x4() local 108 const v128_t vi3x8ACE = wasm_v32x4_shuffle(vi3x89AB, vi3xCDEF, 0, 2, 4, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_2x4() 109 const v128_t vi3x9BDF = wasm_v32x4_shuffle(vi3x89AB, vi3xCDEF, 1, 3, 5, 7); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_2x4() 173 const v128_t vi3xCDEF = wasm_v128_load(i3 + 4); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_2x4() local 183 …const v128_t vi3x8ACE = wasm_v128_and(vmask_even, wasm_v32x4_shuffle(vi3x89AB, vi3xCDEF, 0, 2, 4, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_2x4() 184 …const v128_t vi3x9BDF = wasm_v128_and(vmask_odd, wasm_v32x4_shuffle(vi3x89AB, vi3xCDEF, 1, 3, 5, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_2x4()
|
D | 3x3s2p1-minmax-wasmsimd-x86-splat-2x4-acc2.c | 96 const v128_t vi3xCDEF = wasm_v128_load(i3 + 4); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_2x4_acc2() local 108 const v128_t vi3x8ACE = wasm_v32x4_shuffle(vi3x89AB, vi3xCDEF, 0, 2, 4, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_2x4_acc2() 109 const v128_t vi3x9BDF = wasm_v32x4_shuffle(vi3x89AB, vi3xCDEF, 1, 3, 5, 7); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_2x4_acc2() 175 const v128_t vi3xCDEF = wasm_v128_load(i3 + 4); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_2x4_acc2() local 185 …const v128_t vi3x8ACE = wasm_v128_and(vmask_even, wasm_v32x4_shuffle(vi3x89AB, vi3xCDEF, 0, 2, 4, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_2x4_acc2() 186 …const v128_t vi3x9BDF = wasm_v128_and(vmask_odd, wasm_v32x4_shuffle(vi3x89AB, vi3xCDEF, 1, 3, 5, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_2x4_acc2()
|
D | 3x3s2p1-minmax-wasmsimd-x86-loadsplat-3x4.c | 119 const v128_t vi3xCDEF = wasm_v128_load(i3 + 4); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_3x4() local 137 const v128_t vi3x8ACE = wasm_v32x4_shuffle(vi3x89AB, vi3xCDEF, 0, 2, 4, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_3x4() 138 const v128_t vi3x9BDF = wasm_v32x4_shuffle(vi3x89AB, vi3xCDEF, 1, 3, 5, 7); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_3x4() 223 const v128_t vi3xCDEF = wasm_v128_load(i3 + 4); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_3x4() local 237 …const v128_t vi3x8ACE = wasm_v128_and(vmask_even, wasm_v32x4_shuffle(vi3x89AB, vi3xCDEF, 0, 2, 4, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_3x4() 238 …const v128_t vi3x9BDF = wasm_v128_and(vmask_odd, wasm_v32x4_shuffle(vi3x89AB, vi3xCDEF, 1, 3, 5, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_3x4()
|
D | 3x3s2p1-minmax-wasmsimd-arm-loadsplat-3x4.c | 119 const v128_t vi3xCDEF = wasm_v128_load(i3 + 4); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_3x4() local 137 const v128_t vi3x8ACE = wasm_v32x4_shuffle(vi3x89AB, vi3xCDEF, 0, 2, 4, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_3x4() 138 const v128_t vi3x9BDF = wasm_v32x4_shuffle(vi3x89AB, vi3xCDEF, 1, 3, 5, 7); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_3x4() 223 const v128_t vi3xCDEF = wasm_v128_load(i3 + 4); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_3x4() local 237 …const v128_t vi3x8ACE = wasm_v128_and(vmask_even, wasm_v32x4_shuffle(vi3x89AB, vi3xCDEF, 0, 2, 4, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_3x4() 238 …const v128_t vi3x9BDF = wasm_v128_and(vmask_odd, wasm_v32x4_shuffle(vi3x89AB, vi3xCDEF, 1, 3, 5, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_3x4()
|
D | 3x3s2p1-minmax-wasmsimd-arm-splat-3x4.c | 109 const v128_t vi3xCDEF = wasm_v128_load(i3 + 4); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_3x4() local 127 const v128_t vi3x8ACE = wasm_v32x4_shuffle(vi3x89AB, vi3xCDEF, 0, 2, 4, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_3x4() 128 const v128_t vi3x9BDF = wasm_v32x4_shuffle(vi3x89AB, vi3xCDEF, 1, 3, 5, 7); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_3x4() 213 const v128_t vi3xCDEF = wasm_v128_load(i3 + 4); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_3x4() local 227 …const v128_t vi3x8ACE = wasm_v128_and(vmask_even, wasm_v32x4_shuffle(vi3x89AB, vi3xCDEF, 0, 2, 4, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_3x4() 228 …const v128_t vi3x9BDF = wasm_v128_and(vmask_odd, wasm_v32x4_shuffle(vi3x89AB, vi3xCDEF, 1, 3, 5, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_3x4()
|
D | 3x3s2p1-minmax-wasmsimd-x86-splat-3x4.c | 109 const v128_t vi3xCDEF = wasm_v128_load(i3 + 4); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_3x4() local 127 const v128_t vi3x8ACE = wasm_v32x4_shuffle(vi3x89AB, vi3xCDEF, 0, 2, 4, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_3x4() 128 const v128_t vi3x9BDF = wasm_v32x4_shuffle(vi3x89AB, vi3xCDEF, 1, 3, 5, 7); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_3x4() 213 const v128_t vi3xCDEF = wasm_v128_load(i3 + 4); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_3x4() local 227 …const v128_t vi3x8ACE = wasm_v128_and(vmask_even, wasm_v32x4_shuffle(vi3x89AB, vi3xCDEF, 0, 2, 4, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_3x4() 228 …const v128_t vi3x9BDF = wasm_v128_and(vmask_odd, wasm_v32x4_shuffle(vi3x89AB, vi3xCDEF, 1, 3, 5, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_3x4()
|
D | 3x3s2p1-minmax-sse-3x4.c | 109 const __m128 vi3xCDEF = _mm_loadu_ps(i3 + 4); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_3x4() local 127 const __m128 vi3x8ACE = _mm_shuffle_ps(vi3x89AB, vi3xCDEF, _MM_SHUFFLE(2, 0, 2, 0)); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_3x4() 128 const __m128 vi3x9BDF = _mm_shuffle_ps(vi3x89AB, vi3xCDEF, _MM_SHUFFLE(3, 1, 3, 1)); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_3x4() 216 const __m128 vi3xCDEF = _mm_loadu_ps(i3 + 4); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_3x4() local 230 …const __m128 vi3x8ACE = _mm_and_ps(vmask_even, _mm_shuffle_ps(vi3x89AB, vi3xCDEF, _MM_SHUFFLE(2, 0… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_3x4() 231 …const __m128 vi3x9BDF = _mm_and_ps(vmask_odd, _mm_shuffle_ps(vi3x89AB, vi3xCDEF, _MM_SHUFFLE(3, 1… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_3x4()
|
D | 3x3s2p1-minmax-wasmsimd-arm-loadsplat-4x4.c | 132 const v128_t vi3xCDEF = wasm_v128_load(i3 + 4); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_4x4() local 156 const v128_t vi3x8ACE = wasm_v32x4_shuffle(vi3x89AB, vi3xCDEF, 0, 2, 4, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_4x4() 157 const v128_t vi3x9BDF = wasm_v32x4_shuffle(vi3x89AB, vi3xCDEF, 1, 3, 5, 7); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_4x4() 263 const v128_t vi3xCDEF = wasm_v128_load(i3 + 4); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_4x4() local 281 …const v128_t vi3x8ACE = wasm_v128_and(vmask_even, wasm_v32x4_shuffle(vi3x89AB, vi3xCDEF, 0, 2, 4, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_4x4() 282 …const v128_t vi3x9BDF = wasm_v128_and(vmask_odd, wasm_v32x4_shuffle(vi3x89AB, vi3xCDEF, 1, 3, 5, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_4x4()
|
D | 3x3s2p1-minmax-wasmsimd-x86-loadsplat-4x4.c | 132 const v128_t vi3xCDEF = wasm_v128_load(i3 + 4); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_4x4() local 156 const v128_t vi3x8ACE = wasm_v32x4_shuffle(vi3x89AB, vi3xCDEF, 0, 2, 4, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_4x4() 157 const v128_t vi3x9BDF = wasm_v32x4_shuffle(vi3x89AB, vi3xCDEF, 1, 3, 5, 7); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_4x4() 263 const v128_t vi3xCDEF = wasm_v128_load(i3 + 4); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_4x4() local 281 …const v128_t vi3x8ACE = wasm_v128_and(vmask_even, wasm_v32x4_shuffle(vi3x89AB, vi3xCDEF, 0, 2, 4, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_4x4() 282 …const v128_t vi3x9BDF = wasm_v128_and(vmask_odd, wasm_v32x4_shuffle(vi3x89AB, vi3xCDEF, 1, 3, 5, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_4x4()
|
D | 3x3s2p1-minmax-wasmsimd-arm-splat-4x4.c | 122 const v128_t vi3xCDEF = wasm_v128_load(i3 + 4); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_4x4() local 146 const v128_t vi3x8ACE = wasm_v32x4_shuffle(vi3x89AB, vi3xCDEF, 0, 2, 4, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_4x4() 147 const v128_t vi3x9BDF = wasm_v32x4_shuffle(vi3x89AB, vi3xCDEF, 1, 3, 5, 7); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_4x4() 253 const v128_t vi3xCDEF = wasm_v128_load(i3 + 4); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_4x4() local 271 …const v128_t vi3x8ACE = wasm_v128_and(vmask_even, wasm_v32x4_shuffle(vi3x89AB, vi3xCDEF, 0, 2, 4, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_4x4() 272 …const v128_t vi3x9BDF = wasm_v128_and(vmask_odd, wasm_v32x4_shuffle(vi3x89AB, vi3xCDEF, 1, 3, 5, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_4x4()
|
D | 3x3s2p1-minmax-wasmsimd-x86-splat-4x4.c | 122 const v128_t vi3xCDEF = wasm_v128_load(i3 + 4); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_4x4() local 146 const v128_t vi3x8ACE = wasm_v32x4_shuffle(vi3x89AB, vi3xCDEF, 0, 2, 4, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_4x4() 147 const v128_t vi3x9BDF = wasm_v32x4_shuffle(vi3x89AB, vi3xCDEF, 1, 3, 5, 7); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_4x4() 253 const v128_t vi3xCDEF = wasm_v128_load(i3 + 4); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_4x4() local 271 …const v128_t vi3x8ACE = wasm_v128_and(vmask_even, wasm_v32x4_shuffle(vi3x89AB, vi3xCDEF, 0, 2, 4, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_4x4() 272 …const v128_t vi3x9BDF = wasm_v128_and(vmask_odd, wasm_v32x4_shuffle(vi3x89AB, vi3xCDEF, 1, 3, 5, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_4x4()
|
D | 3x3s2p1-minmax-sse-4x4.c | 121 const __m128 vi3xCDEF = _mm_loadu_ps(i3 + 4); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_4x4() local 145 const __m128 vi3x8ACE = _mm_shuffle_ps(vi3x89AB, vi3xCDEF, _MM_SHUFFLE(2, 0, 2, 0)); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_4x4() 146 const __m128 vi3x9BDF = _mm_shuffle_ps(vi3x89AB, vi3xCDEF, _MM_SHUFFLE(3, 1, 3, 1)); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_4x4() 257 const __m128 vi3xCDEF = _mm_loadu_ps(i3 + 4); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_4x4() local 275 …const __m128 vi3x8ACE = _mm_and_ps(vmask_even, _mm_shuffle_ps(vi3x89AB, vi3xCDEF, _MM_SHUFFLE(2, 0… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_4x4() 276 …const __m128 vi3x9BDF = _mm_and_ps(vmask_odd, _mm_shuffle_ps(vi3x89AB, vi3xCDEF, _MM_SHUFFLE(3, 1… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_4x4()
|
D | 3x3s2p1-minmax-sse-5x4.c | 133 const __m128 vi3xCDEF = _mm_loadu_ps(i3 + 4); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_5x4() local 163 const __m128 vi3x8ACE = _mm_shuffle_ps(vi3x89AB, vi3xCDEF, _MM_SHUFFLE(2, 0, 2, 0)); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_5x4() 164 const __m128 vi3x9BDF = _mm_shuffle_ps(vi3x89AB, vi3xCDEF, _MM_SHUFFLE(3, 1, 3, 1)); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_5x4() 298 const __m128 vi3xCDEF = _mm_loadu_ps(i3 + 4); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_5x4() local 320 …const __m128 vi3x8ACE = _mm_and_ps(vmask_even, _mm_shuffle_ps(vi3x89AB, vi3xCDEF, _MM_SHUFFLE(2, 0… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_5x4() 321 …const __m128 vi3x9BDF = _mm_and_ps(vmask_odd, _mm_shuffle_ps(vi3x89AB, vi3xCDEF, _MM_SHUFFLE(3, 1… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_5x4()
|
D | 3x3s2p1-minmax-sse-6x4.c | 145 const __m128 vi3xCDEF = _mm_loadu_ps(i3 + 4); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_6x4() local 181 const __m128 vi3x8ACE = _mm_shuffle_ps(vi3x89AB, vi3xCDEF, _MM_SHUFFLE(2, 0, 2, 0)); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_6x4() 182 const __m128 vi3x9BDF = _mm_shuffle_ps(vi3x89AB, vi3xCDEF, _MM_SHUFFLE(3, 1, 3, 1)); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_6x4() 339 const __m128 vi3xCDEF = _mm_loadu_ps(i3 + 4); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_6x4() local 365 …const __m128 vi3x8ACE = _mm_and_ps(vmask_even, _mm_shuffle_ps(vi3x89AB, vi3xCDEF, _MM_SHUFFLE(2, 0… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_6x4() 366 …const __m128 vi3x9BDF = _mm_and_ps(vmask_odd, _mm_shuffle_ps(vi3x89AB, vi3xCDEF, _MM_SHUFFLE(3, 1… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_6x4()
|
/external/XNNPACK/src/f32-prelu/gen/ |
D | wasmsimd-minmax-4x16.c | 84 v128_t vi3xCDEF = wasm_v128_load(i3 + 12); in xnn_f32_prelu_ukernel__wasmsimd_minmax_4x16() local 117 v128_t vacc3xCDEF = wasm_i32x4_max(vi3xCDEF, vzero); in xnn_f32_prelu_ukernel__wasmsimd_minmax_4x16() 118 vi3xCDEF = wasm_i32x4_min(vi3xCDEF, vzero); in xnn_f32_prelu_ukernel__wasmsimd_minmax_4x16() 135 vacc3xCDEF = wasm_f32x4_add(vacc3xCDEF, wasm_f32x4_mul(vi3xCDEF, vwCDEF)); in xnn_f32_prelu_ukernel__wasmsimd_minmax_4x16()
|
D | wasmsimd-bitselect-4x16.c | 84 const v128_t vi3xCDEF = wasm_v128_load(i3 + 12); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_4x16() local 117 v128_t vacc3xCDEF = wasm_f32x4_mul(vi3xCDEF, vwCDEF); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_4x16() 118 const v128_t vmask3xCDEF = wasm_i32x4_lt(vi3xCDEF, vzero); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_4x16() 135 vacc3xCDEF = wasm_v128_bitselect(vacc3xCDEF, vi3xCDEF, vmask3xCDEF); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_4x16()
|
D | neon-4x16.c | 79 const float32x4_t vi3xCDEF = vld1q_f32(i3); i3 += 4; in xnn_f32_prelu_ukernel__neon_4x16() local 111 float32x4_t vacc3xCDEF = vmulq_f32(vi3xCDEF, vwCDEF); in xnn_f32_prelu_ukernel__neon_4x16() 112 const uint32x4_t vm3xCDEF = vcltq_s32(vreinterpretq_s32_f32(vi3xCDEF), vmovq_n_s32(0)); in xnn_f32_prelu_ukernel__neon_4x16() 129 vacc3xCDEF = vbslq_f32(vm3xCDEF, vacc3xCDEF, vi3xCDEF); in xnn_f32_prelu_ukernel__neon_4x16()
|