Home
last modified time | relevance | path

Searched refs:__m128 (Results 1 – 25 of 939) sorted by relevance

12345678910>>...38

/external/llvm-project/clang/test/CodeGen/X86/
Dsse-builtins.c9 __m128 test_mm_add_ps(__m128 A, __m128 B) { in test_mm_add_ps()
15 __m128 test_mm_add_ss(__m128 A, __m128 B) { in test_mm_add_ss()
24 __m128 test_mm_and_ps(__m128 A, __m128 B) { in test_mm_and_ps()
30 __m128 test_mm_andnot_ps(__m128 A, __m128 B) { in test_mm_andnot_ps()
37 __m128 test_mm_cmpeq_ps(__m128 __a, __m128 __b) { in test_mm_cmpeq_ps()
46 __m128 test_mm_cmpeq_ss(__m128 __a, __m128 __b) { in test_mm_cmpeq_ss()
52 __m128 test_mm_cmpge_ps(__m128 __a, __m128 __b) { in test_mm_cmpge_ps()
61 __m128 test_mm_cmpge_ss(__m128 __a, __m128 __b) { in test_mm_cmpge_ss()
68 __m128 test_mm_cmpgt_ps(__m128 __a, __m128 __b) { in test_mm_cmpgt_ps()
77 __m128 test_mm_cmpgt_ss(__m128 __a, __m128 __b) { in test_mm_cmpgt_ss()
[all …]
Dsse-builtins-constrained-cmp.c6 __m128 test_mm_cmpeq_ps(__m128 __a, __m128 __b) { in test_mm_cmpeq_ps()
15 __m128 test_mm_cmpge_ps(__m128 __a, __m128 __b) { in test_mm_cmpge_ps()
24 __m128 test_mm_cmpgt_ps(__m128 __a, __m128 __b) { in test_mm_cmpgt_ps()
33 __m128 test_mm_cmple_ps(__m128 __a, __m128 __b) { in test_mm_cmple_ps()
42 __m128 test_mm_cmplt_ps(__m128 __a, __m128 __b) { in test_mm_cmplt_ps()
51 __m128 test_mm_cmpneq_ps(__m128 __a, __m128 __b) { in test_mm_cmpneq_ps()
60 __m128 test_mm_cmpnge_ps(__m128 __a, __m128 __b) { in test_mm_cmpnge_ps()
69 __m128 test_mm_cmpngt_ps(__m128 __a, __m128 __b) { in test_mm_cmpngt_ps()
78 __m128 test_mm_cmpnle_ps(__m128 __a, __m128 __b) { in test_mm_cmpnle_ps()
87 __m128 test_mm_cmpnlt_ps(__m128 __a, __m128 __b) { in test_mm_cmpnlt_ps()
[all …]
/external/clang/test/CodeGen/
Dsse-builtins.c10 __m128 test_mm_add_ps(__m128 A, __m128 B) { in test_mm_add_ps()
16 __m128 test_mm_add_ss(__m128 A, __m128 B) { in test_mm_add_ss()
25 __m128 test_mm_and_ps(__m128 A, __m128 B) { in test_mm_and_ps()
31 __m128 test_mm_andnot_ps(__m128 A, __m128 B) { in test_mm_andnot_ps()
38 __m128 test_mm_cmpeq_ps(__m128 __a, __m128 __b) { in test_mm_cmpeq_ps()
47 __m128 test_mm_cmpeq_ss(__m128 __a, __m128 __b) { in test_mm_cmpeq_ss()
53 __m128 test_mm_cmpge_ps(__m128 __a, __m128 __b) { in test_mm_cmpge_ps()
62 __m128 test_mm_cmpge_ss(__m128 __a, __m128 __b) { in test_mm_cmpge_ss()
69 __m128 test_mm_cmpgt_ps(__m128 __a, __m128 __b) { in test_mm_cmpgt_ps()
78 __m128 test_mm_cmpgt_ss(__m128 __a, __m128 __b) { in test_mm_cmpgt_ss()
[all …]
/external/oboe/samples/RhythmGame/third_party/glm/simd/
Dmatrix.h36 __m128 v0 = _mm_shuffle_ps(v, v, _MM_SHUFFLE(0, 0, 0, 0)); in glm_mat4_mul_vec4()
37 __m128 v1 = _mm_shuffle_ps(v, v, _MM_SHUFFLE(1, 1, 1, 1)); in glm_mat4_mul_vec4()
38 __m128 v2 = _mm_shuffle_ps(v, v, _MM_SHUFFLE(2, 2, 2, 2)); in glm_mat4_mul_vec4()
39 __m128 v3 = _mm_shuffle_ps(v, v, _MM_SHUFFLE(3, 3, 3, 3)); in glm_mat4_mul_vec4()
41 __m128 m0 = _mm_mul_ps(m[0], v0); in glm_mat4_mul_vec4()
42 __m128 m1 = _mm_mul_ps(m[1], v1); in glm_mat4_mul_vec4()
43 __m128 m2 = _mm_mul_ps(m[2], v2); in glm_mat4_mul_vec4()
44 __m128 m3 = _mm_mul_ps(m[3], v3); in glm_mat4_mul_vec4()
46 __m128 a0 = _mm_add_ps(m0, m1); in glm_mat4_mul_vec4()
47 __m128 a1 = _mm_add_ps(m2, m3); in glm_mat4_mul_vec4()
[all …]
/external/clang/lib/Headers/
Dxmmintrin.h31 typedef float __m128 __attribute__((__vector_size__(16))); typedef
60 static __inline__ __m128 __DEFAULT_FN_ATTRS
61 _mm_add_ss(__m128 __a, __m128 __b) in _mm_add_ss()
80 static __inline__ __m128 __DEFAULT_FN_ATTRS
81 _mm_add_ps(__m128 __a, __m128 __b) in _mm_add_ps()
83 return (__m128)((__v4sf)__a + (__v4sf)__b); in _mm_add_ps()
102 static __inline__ __m128 __DEFAULT_FN_ATTRS
103 _mm_sub_ss(__m128 __a, __m128 __b) in _mm_sub_ss()
123 static __inline__ __m128 __DEFAULT_FN_ATTRS
124 _mm_sub_ps(__m128 __a, __m128 __b) in _mm_sub_ps()
[all …]
/external/llvm-project/clang/lib/Headers/
Dxmmintrin.h17 typedef float __m128 __attribute__((__vector_size__(16), __aligned__(16))); typedef
49 static __inline__ __m128 __DEFAULT_FN_ATTRS
50 _mm_add_ss(__m128 __a, __m128 __b) in _mm_add_ss()
69 static __inline__ __m128 __DEFAULT_FN_ATTRS
70 _mm_add_ps(__m128 __a, __m128 __b) in _mm_add_ps()
72 return (__m128)((__v4sf)__a + (__v4sf)__b); in _mm_add_ps()
91 static __inline__ __m128 __DEFAULT_FN_ATTRS
92 _mm_sub_ss(__m128 __a, __m128 __b) in _mm_sub_ss()
112 static __inline__ __m128 __DEFAULT_FN_ATTRS
113 _mm_sub_ps(__m128 __a, __m128 __b) in _mm_sub_ps()
[all …]
/external/XNNPACK/src/f32-dwconv2d-chw/gen/
D5x5s2p2-minmax-sse-1x4-acc5.c34 const __m128 vmask_even = _mm_load_ps((const float*) params->sse.mask_even); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4_acc5()
35 const __m128 vmask_odd = _mm_load_ps((const float*) params->sse.mask_odd); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4_acc5()
36 const __m128 vmax = _mm_load_ps(params->sse.max); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4_acc5()
37 const __m128 vmin = _mm_load_ps(params->sse.min); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4_acc5()
39 const __m128 vbias = _mm_load1_ps(weights); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4_acc5()
40 const __m128 vk00 = _mm_load1_ps(weights + 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4_acc5()
41 const __m128 vk01 = _mm_load1_ps(weights + 2); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4_acc5()
42 const __m128 vk02 = _mm_load1_ps(weights + 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4_acc5()
43 const __m128 vk03 = _mm_load1_ps(weights + 4); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4_acc5()
44 const __m128 vk04 = _mm_load1_ps(weights + 5); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4_acc5()
[all …]
D5x5s2p2-minmax-sse-1x4-acc4.c34 const __m128 vmask_even = _mm_load_ps((const float*) params->sse.mask_even); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4_acc4()
35 const __m128 vmask_odd = _mm_load_ps((const float*) params->sse.mask_odd); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4_acc4()
36 const __m128 vmax = _mm_load_ps(params->sse.max); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4_acc4()
37 const __m128 vmin = _mm_load_ps(params->sse.min); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4_acc4()
39 const __m128 vbias = _mm_load1_ps(weights); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4_acc4()
40 const __m128 vk00 = _mm_load1_ps(weights + 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4_acc4()
41 const __m128 vk01 = _mm_load1_ps(weights + 2); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4_acc4()
42 const __m128 vk02 = _mm_load1_ps(weights + 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4_acc4()
43 const __m128 vk03 = _mm_load1_ps(weights + 4); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4_acc4()
44 const __m128 vk04 = _mm_load1_ps(weights + 5); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4_acc4()
[all …]
D5x5s2p2-minmax-sse-1x4-acc3.c34 const __m128 vmask_even = _mm_load_ps((const float*) params->sse.mask_even); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4_acc3()
35 const __m128 vmask_odd = _mm_load_ps((const float*) params->sse.mask_odd); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4_acc3()
36 const __m128 vmax = _mm_load_ps(params->sse.max); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4_acc3()
37 const __m128 vmin = _mm_load_ps(params->sse.min); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4_acc3()
39 const __m128 vbias = _mm_load1_ps(weights); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4_acc3()
40 const __m128 vk00 = _mm_load1_ps(weights + 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4_acc3()
41 const __m128 vk01 = _mm_load1_ps(weights + 2); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4_acc3()
42 const __m128 vk02 = _mm_load1_ps(weights + 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4_acc3()
43 const __m128 vk03 = _mm_load1_ps(weights + 4); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4_acc3()
44 const __m128 vk04 = _mm_load1_ps(weights + 5); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4_acc3()
[all …]
D5x5s2p2-minmax-sse-1x4-acc2.c34 const __m128 vmask_even = _mm_load_ps((const float*) params->sse.mask_even); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4_acc2()
35 const __m128 vmask_odd = _mm_load_ps((const float*) params->sse.mask_odd); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4_acc2()
36 const __m128 vmax = _mm_load_ps(params->sse.max); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4_acc2()
37 const __m128 vmin = _mm_load_ps(params->sse.min); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4_acc2()
39 const __m128 vbias = _mm_load1_ps(weights); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4_acc2()
40 const __m128 vk00 = _mm_load1_ps(weights + 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4_acc2()
41 const __m128 vk01 = _mm_load1_ps(weights + 2); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4_acc2()
42 const __m128 vk02 = _mm_load1_ps(weights + 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4_acc2()
43 const __m128 vk03 = _mm_load1_ps(weights + 4); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4_acc2()
44 const __m128 vk04 = _mm_load1_ps(weights + 5); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4_acc2()
[all …]
D5x5s2p2-minmax-sse-1x4.c34 const __m128 vmask_even = _mm_load_ps((const float*) params->sse.mask_even); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4()
35 const __m128 vmask_odd = _mm_load_ps((const float*) params->sse.mask_odd); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4()
36 const __m128 vmax = _mm_load_ps(params->sse.max); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4()
37 const __m128 vmin = _mm_load_ps(params->sse.min); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4()
39 const __m128 vbias = _mm_load1_ps(weights); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4()
40 const __m128 vk00 = _mm_load1_ps(weights + 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4()
41 const __m128 vk01 = _mm_load1_ps(weights + 2); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4()
42 const __m128 vk02 = _mm_load1_ps(weights + 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4()
43 const __m128 vk03 = _mm_load1_ps(weights + 4); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4()
44 const __m128 vk04 = _mm_load1_ps(weights + 5); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4()
[all …]
D5x5p2-minmax-sse-1x4-acc5.c33 const __m128 vmask = _mm_load_ps((const float*) params->sse.mask); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc5()
34 const __m128 vmax = _mm_load_ps(params->sse.max); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc5()
35 const __m128 vmin = _mm_load_ps(params->sse.min); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc5()
37 const __m128 vbias = _mm_load1_ps(weights); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc5()
38 const __m128 vk00 = _mm_load1_ps(weights + 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc5()
39 const __m128 vk01 = _mm_load1_ps(weights + 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc5()
40 const __m128 vk02 = _mm_load1_ps(weights + 3); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc5()
41 const __m128 vk03 = _mm_load1_ps(weights + 4); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc5()
42 const __m128 vk04 = _mm_load1_ps(weights + 5); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc5()
43 const __m128 vk10 = _mm_load1_ps(weights + 6); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc5()
[all …]
D5x5p2-minmax-sse-1x4-acc4.c33 const __m128 vmask = _mm_load_ps((const float*) params->sse.mask); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc4()
34 const __m128 vmax = _mm_load_ps(params->sse.max); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc4()
35 const __m128 vmin = _mm_load_ps(params->sse.min); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc4()
37 const __m128 vbias = _mm_load1_ps(weights); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc4()
38 const __m128 vk00 = _mm_load1_ps(weights + 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc4()
39 const __m128 vk01 = _mm_load1_ps(weights + 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc4()
40 const __m128 vk02 = _mm_load1_ps(weights + 3); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc4()
41 const __m128 vk03 = _mm_load1_ps(weights + 4); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc4()
42 const __m128 vk04 = _mm_load1_ps(weights + 5); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc4()
43 const __m128 vk10 = _mm_load1_ps(weights + 6); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc4()
[all …]
D5x5p2-minmax-sse-1x4-acc3.c33 const __m128 vmask = _mm_load_ps((const float*) params->sse.mask); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc3()
34 const __m128 vmax = _mm_load_ps(params->sse.max); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc3()
35 const __m128 vmin = _mm_load_ps(params->sse.min); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc3()
37 const __m128 vbias = _mm_load1_ps(weights); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc3()
38 const __m128 vk00 = _mm_load1_ps(weights + 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc3()
39 const __m128 vk01 = _mm_load1_ps(weights + 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc3()
40 const __m128 vk02 = _mm_load1_ps(weights + 3); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc3()
41 const __m128 vk03 = _mm_load1_ps(weights + 4); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc3()
42 const __m128 vk04 = _mm_load1_ps(weights + 5); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc3()
43 const __m128 vk10 = _mm_load1_ps(weights + 6); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc3()
[all …]
D5x5s2p2-minmax-sse-2x4-acc3.c34 const __m128 vmask_even = _mm_load_ps((const float*) params->sse.mask_even); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_2x4_acc3()
35 const __m128 vmask_odd = _mm_load_ps((const float*) params->sse.mask_odd); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_2x4_acc3()
36 const __m128 vmax = _mm_load_ps(params->sse.max); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_2x4_acc3()
37 const __m128 vmin = _mm_load_ps(params->sse.min); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_2x4_acc3()
39 const __m128 vbias = _mm_load1_ps(weights); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_2x4_acc3()
40 const __m128 vk00 = _mm_load1_ps(weights + 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_2x4_acc3()
41 const __m128 vk01 = _mm_load1_ps(weights + 2); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_2x4_acc3()
42 const __m128 vk02 = _mm_load1_ps(weights + 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_2x4_acc3()
43 const __m128 vk03 = _mm_load1_ps(weights + 4); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_2x4_acc3()
44 const __m128 vk04 = _mm_load1_ps(weights + 5); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_2x4_acc3()
[all …]
D5x5p2-minmax-sse-1x4-acc2.c33 const __m128 vmask = _mm_load_ps((const float*) params->sse.mask); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc2()
34 const __m128 vmax = _mm_load_ps(params->sse.max); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc2()
35 const __m128 vmin = _mm_load_ps(params->sse.min); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc2()
37 const __m128 vbias = _mm_load1_ps(weights); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc2()
38 const __m128 vk00 = _mm_load1_ps(weights + 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc2()
39 const __m128 vk01 = _mm_load1_ps(weights + 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc2()
40 const __m128 vk02 = _mm_load1_ps(weights + 3); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc2()
41 const __m128 vk03 = _mm_load1_ps(weights + 4); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc2()
42 const __m128 vk04 = _mm_load1_ps(weights + 5); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc2()
43 const __m128 vk10 = _mm_load1_ps(weights + 6); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc2()
[all …]
D5x5s2p2-minmax-sse-3x4-acc2.c34 const __m128 vmask_even = _mm_load_ps((const float*) params->sse.mask_even); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_3x4_acc2()
35 const __m128 vmask_odd = _mm_load_ps((const float*) params->sse.mask_odd); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_3x4_acc2()
36 const __m128 vmax = _mm_load_ps(params->sse.max); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_3x4_acc2()
37 const __m128 vmin = _mm_load_ps(params->sse.min); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_3x4_acc2()
39 const __m128 vbias = _mm_load1_ps(weights); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_3x4_acc2()
40 const __m128 vk00 = _mm_load1_ps(weights + 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_3x4_acc2()
41 const __m128 vk01 = _mm_load1_ps(weights + 2); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_3x4_acc2()
42 const __m128 vk02 = _mm_load1_ps(weights + 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_3x4_acc2()
43 const __m128 vk03 = _mm_load1_ps(weights + 4); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_3x4_acc2()
44 const __m128 vk04 = _mm_load1_ps(weights + 5); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_3x4_acc2()
[all …]
D5x5s2p2-minmax-sse-2x4-acc2.c34 const __m128 vmask_even = _mm_load_ps((const float*) params->sse.mask_even); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_2x4_acc2()
35 const __m128 vmask_odd = _mm_load_ps((const float*) params->sse.mask_odd); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_2x4_acc2()
36 const __m128 vmax = _mm_load_ps(params->sse.max); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_2x4_acc2()
37 const __m128 vmin = _mm_load_ps(params->sse.min); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_2x4_acc2()
39 const __m128 vbias = _mm_load1_ps(weights); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_2x4_acc2()
40 const __m128 vk00 = _mm_load1_ps(weights + 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_2x4_acc2()
41 const __m128 vk01 = _mm_load1_ps(weights + 2); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_2x4_acc2()
42 const __m128 vk02 = _mm_load1_ps(weights + 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_2x4_acc2()
43 const __m128 vk03 = _mm_load1_ps(weights + 4); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_2x4_acc2()
44 const __m128 vk04 = _mm_load1_ps(weights + 5); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_2x4_acc2()
[all …]
D5x5p2-minmax-sse-1x4.c33 const __m128 vmask = _mm_load_ps((const float*) params->sse.mask); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4()
34 const __m128 vmax = _mm_load_ps(params->sse.max); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4()
35 const __m128 vmin = _mm_load_ps(params->sse.min); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4()
37 const __m128 vbias = _mm_load1_ps(weights); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4()
38 const __m128 vk00 = _mm_load1_ps(weights + 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4()
39 const __m128 vk01 = _mm_load1_ps(weights + 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4()
40 const __m128 vk02 = _mm_load1_ps(weights + 3); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4()
41 const __m128 vk03 = _mm_load1_ps(weights + 4); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4()
42 const __m128 vk04 = _mm_load1_ps(weights + 5); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4()
43 const __m128 vk10 = _mm_load1_ps(weights + 6); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4()
[all …]
D5x5s2p2-minmax-sse-2x4.c34 const __m128 vmask_even = _mm_load_ps((const float*) params->sse.mask_even); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_2x4()
35 const __m128 vmask_odd = _mm_load_ps((const float*) params->sse.mask_odd); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_2x4()
36 const __m128 vmax = _mm_load_ps(params->sse.max); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_2x4()
37 const __m128 vmin = _mm_load_ps(params->sse.min); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_2x4()
39 const __m128 vbias = _mm_load1_ps(weights); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_2x4()
40 const __m128 vk00 = _mm_load1_ps(weights + 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_2x4()
41 const __m128 vk01 = _mm_load1_ps(weights + 2); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_2x4()
42 const __m128 vk02 = _mm_load1_ps(weights + 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_2x4()
43 const __m128 vk03 = _mm_load1_ps(weights + 4); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_2x4()
44 const __m128 vk04 = _mm_load1_ps(weights + 5); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_2x4()
[all …]
D5x5s2p2-minmax-sse-3x4.c34 const __m128 vmask_even = _mm_load_ps((const float*) params->sse.mask_even); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_3x4()
35 const __m128 vmask_odd = _mm_load_ps((const float*) params->sse.mask_odd); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_3x4()
36 const __m128 vmax = _mm_load_ps(params->sse.max); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_3x4()
37 const __m128 vmin = _mm_load_ps(params->sse.min); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_3x4()
39 const __m128 vbias = _mm_load1_ps(weights); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_3x4()
40 const __m128 vk00 = _mm_load1_ps(weights + 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_3x4()
41 const __m128 vk01 = _mm_load1_ps(weights + 2); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_3x4()
42 const __m128 vk02 = _mm_load1_ps(weights + 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_3x4()
43 const __m128 vk03 = _mm_load1_ps(weights + 4); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_3x4()
44 const __m128 vk04 = _mm_load1_ps(weights + 5); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_3x4()
[all …]
/external/XNNPACK/src/f32-dwconv/gen/
Dup8x25-minmax-sse-acc2.c32 const __m128 vmax = _mm_load_ps(params->sse.max); in xnn_f32_dwconv_minmax_ukernel_up8x25__sse_acc2()
33 const __m128 vmin = _mm_load_ps(params->sse.min); in xnn_f32_dwconv_minmax_ukernel_up8x25__sse_acc2()
165 __m128 vacc0123p0 = _mm_load_ps(w); in xnn_f32_dwconv_minmax_ukernel_up8x25__sse_acc2()
166 __m128 vacc4567p0 = _mm_load_ps(w + 4); in xnn_f32_dwconv_minmax_ukernel_up8x25__sse_acc2()
169 const __m128 vi0x0123 = _mm_loadu_ps(i0); in xnn_f32_dwconv_minmax_ukernel_up8x25__sse_acc2()
170 const __m128 vi0x4567 = _mm_loadu_ps(i0 + 4); in xnn_f32_dwconv_minmax_ukernel_up8x25__sse_acc2()
173 const __m128 vk0x0123 = _mm_load_ps(w + 8); in xnn_f32_dwconv_minmax_ukernel_up8x25__sse_acc2()
174 const __m128 vk0x4567 = _mm_load_ps(w + 12); in xnn_f32_dwconv_minmax_ukernel_up8x25__sse_acc2()
178 const __m128 vi1x0123 = _mm_loadu_ps(i1); in xnn_f32_dwconv_minmax_ukernel_up8x25__sse_acc2()
179 const __m128 vi1x4567 = _mm_loadu_ps(i1 + 4); in xnn_f32_dwconv_minmax_ukernel_up8x25__sse_acc2()
[all …]
Dup8x25-minmax-sse.c32 const __m128 vmax = _mm_load_ps(params->sse.max); in xnn_f32_dwconv_minmax_ukernel_up8x25__sse()
33 const __m128 vmin = _mm_load_ps(params->sse.min); in xnn_f32_dwconv_minmax_ukernel_up8x25__sse()
165 __m128 vacc0123p0 = _mm_load_ps(w); in xnn_f32_dwconv_minmax_ukernel_up8x25__sse()
166 __m128 vacc4567p0 = _mm_load_ps(w + 4); in xnn_f32_dwconv_minmax_ukernel_up8x25__sse()
169 const __m128 vi0x0123 = _mm_loadu_ps(i0); in xnn_f32_dwconv_minmax_ukernel_up8x25__sse()
170 const __m128 vi0x4567 = _mm_loadu_ps(i0 + 4); in xnn_f32_dwconv_minmax_ukernel_up8x25__sse()
173 const __m128 vk0x0123 = _mm_load_ps(w + 8); in xnn_f32_dwconv_minmax_ukernel_up8x25__sse()
174 const __m128 vk0x4567 = _mm_load_ps(w + 12); in xnn_f32_dwconv_minmax_ukernel_up8x25__sse()
178 const __m128 vi1x0123 = _mm_loadu_ps(i1); in xnn_f32_dwconv_minmax_ukernel_up8x25__sse()
179 const __m128 vi1x4567 = _mm_loadu_ps(i1 + 4); in xnn_f32_dwconv_minmax_ukernel_up8x25__sse()
[all …]
/external/webrtc/common_audio/third_party/ooura/fft_size_128/
Dooura_fft_sse2.cc27 static __inline __m128 _mm_castsi128_ps(__m128i a) { in _mm_castsi128_ps()
28 return *(__m128*)&a; in _mm_castsi128_ps()
30 static __inline __m128i _mm_castps_si128(__m128 a) { in _mm_castps_si128()
38 const __m128 mm_swap_sign = _mm_load_ps(k_swap_sign); in cft1st_128_SSE2()
42 __m128 a00v = _mm_loadu_ps(&a[j + 0]); in cft1st_128_SSE2()
43 __m128 a04v = _mm_loadu_ps(&a[j + 4]); in cft1st_128_SSE2()
44 __m128 a08v = _mm_loadu_ps(&a[j + 8]); in cft1st_128_SSE2()
45 __m128 a12v = _mm_loadu_ps(&a[j + 12]); in cft1st_128_SSE2()
46 __m128 a01v = _mm_shuffle_ps(a00v, a08v, _MM_SHUFFLE(1, 0, 1, 0)); in cft1st_128_SSE2()
47 __m128 a23v = _mm_shuffle_ps(a00v, a08v, _MM_SHUFFLE(3, 2, 3, 2)); in cft1st_128_SSE2()
[all …]
/external/XNNPACK/src/f32-maxpool/
D9p8x-minmax-sse-c4.c28 const __m128 voutput_max = _mm_load_ps(params->sse.max); in xnn_f32_maxpool_minmax_ukernel_9p8x__sse_c4()
29 const __m128 voutput_min = _mm_load_ps(params->sse.min); in xnn_f32_maxpool_minmax_ukernel_9p8x__sse_c4()
78 const __m128 vi0 = _mm_loadu_ps(i0); in xnn_f32_maxpool_minmax_ukernel_9p8x__sse_c4()
80 const __m128 vi1 = _mm_loadu_ps(i1); in xnn_f32_maxpool_minmax_ukernel_9p8x__sse_c4()
82 const __m128 vi2 = _mm_loadu_ps(i2); in xnn_f32_maxpool_minmax_ukernel_9p8x__sse_c4()
84 const __m128 vi3 = _mm_loadu_ps(i3); in xnn_f32_maxpool_minmax_ukernel_9p8x__sse_c4()
86 const __m128 vi4 = _mm_loadu_ps(i4); in xnn_f32_maxpool_minmax_ukernel_9p8x__sse_c4()
88 const __m128 vi5 = _mm_loadu_ps(i5); in xnn_f32_maxpool_minmax_ukernel_9p8x__sse_c4()
90 const __m128 vi6 = _mm_loadu_ps(i6); in xnn_f32_maxpool_minmax_ukernel_9p8x__sse_c4()
92 const __m128 vi7 = _mm_loadu_ps(i7); in xnn_f32_maxpool_minmax_ukernel_9p8x__sse_c4()
[all …]

12345678910>>...38