/external/llvm-project/clang/test/CodeGen/X86/ |
D | sse-builtins.c | 9 __m128 test_mm_add_ps(__m128 A, __m128 B) { in test_mm_add_ps() 15 __m128 test_mm_add_ss(__m128 A, __m128 B) { in test_mm_add_ss() 24 __m128 test_mm_and_ps(__m128 A, __m128 B) { in test_mm_and_ps() 30 __m128 test_mm_andnot_ps(__m128 A, __m128 B) { in test_mm_andnot_ps() 37 __m128 test_mm_cmpeq_ps(__m128 __a, __m128 __b) { in test_mm_cmpeq_ps() 46 __m128 test_mm_cmpeq_ss(__m128 __a, __m128 __b) { in test_mm_cmpeq_ss() 52 __m128 test_mm_cmpge_ps(__m128 __a, __m128 __b) { in test_mm_cmpge_ps() 61 __m128 test_mm_cmpge_ss(__m128 __a, __m128 __b) { in test_mm_cmpge_ss() 68 __m128 test_mm_cmpgt_ps(__m128 __a, __m128 __b) { in test_mm_cmpgt_ps() 77 __m128 test_mm_cmpgt_ss(__m128 __a, __m128 __b) { in test_mm_cmpgt_ss() [all …]
|
D | sse-builtins-constrained-cmp.c | 6 __m128 test_mm_cmpeq_ps(__m128 __a, __m128 __b) { in test_mm_cmpeq_ps() 15 __m128 test_mm_cmpge_ps(__m128 __a, __m128 __b) { in test_mm_cmpge_ps() 24 __m128 test_mm_cmpgt_ps(__m128 __a, __m128 __b) { in test_mm_cmpgt_ps() 33 __m128 test_mm_cmple_ps(__m128 __a, __m128 __b) { in test_mm_cmple_ps() 42 __m128 test_mm_cmplt_ps(__m128 __a, __m128 __b) { in test_mm_cmplt_ps() 51 __m128 test_mm_cmpneq_ps(__m128 __a, __m128 __b) { in test_mm_cmpneq_ps() 60 __m128 test_mm_cmpnge_ps(__m128 __a, __m128 __b) { in test_mm_cmpnge_ps() 69 __m128 test_mm_cmpngt_ps(__m128 __a, __m128 __b) { in test_mm_cmpngt_ps() 78 __m128 test_mm_cmpnle_ps(__m128 __a, __m128 __b) { in test_mm_cmpnle_ps() 87 __m128 test_mm_cmpnlt_ps(__m128 __a, __m128 __b) { in test_mm_cmpnlt_ps() [all …]
|
/external/clang/test/CodeGen/ |
D | sse-builtins.c | 10 __m128 test_mm_add_ps(__m128 A, __m128 B) { in test_mm_add_ps() 16 __m128 test_mm_add_ss(__m128 A, __m128 B) { in test_mm_add_ss() 25 __m128 test_mm_and_ps(__m128 A, __m128 B) { in test_mm_and_ps() 31 __m128 test_mm_andnot_ps(__m128 A, __m128 B) { in test_mm_andnot_ps() 38 __m128 test_mm_cmpeq_ps(__m128 __a, __m128 __b) { in test_mm_cmpeq_ps() 47 __m128 test_mm_cmpeq_ss(__m128 __a, __m128 __b) { in test_mm_cmpeq_ss() 53 __m128 test_mm_cmpge_ps(__m128 __a, __m128 __b) { in test_mm_cmpge_ps() 62 __m128 test_mm_cmpge_ss(__m128 __a, __m128 __b) { in test_mm_cmpge_ss() 69 __m128 test_mm_cmpgt_ps(__m128 __a, __m128 __b) { in test_mm_cmpgt_ps() 78 __m128 test_mm_cmpgt_ss(__m128 __a, __m128 __b) { in test_mm_cmpgt_ss() [all …]
|
/external/oboe/samples/RhythmGame/third_party/glm/simd/ |
D | matrix.h | 36 __m128 v0 = _mm_shuffle_ps(v, v, _MM_SHUFFLE(0, 0, 0, 0)); in glm_mat4_mul_vec4() 37 __m128 v1 = _mm_shuffle_ps(v, v, _MM_SHUFFLE(1, 1, 1, 1)); in glm_mat4_mul_vec4() 38 __m128 v2 = _mm_shuffle_ps(v, v, _MM_SHUFFLE(2, 2, 2, 2)); in glm_mat4_mul_vec4() 39 __m128 v3 = _mm_shuffle_ps(v, v, _MM_SHUFFLE(3, 3, 3, 3)); in glm_mat4_mul_vec4() 41 __m128 m0 = _mm_mul_ps(m[0], v0); in glm_mat4_mul_vec4() 42 __m128 m1 = _mm_mul_ps(m[1], v1); in glm_mat4_mul_vec4() 43 __m128 m2 = _mm_mul_ps(m[2], v2); in glm_mat4_mul_vec4() 44 __m128 m3 = _mm_mul_ps(m[3], v3); in glm_mat4_mul_vec4() 46 __m128 a0 = _mm_add_ps(m0, m1); in glm_mat4_mul_vec4() 47 __m128 a1 = _mm_add_ps(m2, m3); in glm_mat4_mul_vec4() [all …]
|
/external/clang/lib/Headers/ |
D | xmmintrin.h | 31 typedef float __m128 __attribute__((__vector_size__(16))); typedef 60 static __inline__ __m128 __DEFAULT_FN_ATTRS 61 _mm_add_ss(__m128 __a, __m128 __b) in _mm_add_ss() 80 static __inline__ __m128 __DEFAULT_FN_ATTRS 81 _mm_add_ps(__m128 __a, __m128 __b) in _mm_add_ps() 83 return (__m128)((__v4sf)__a + (__v4sf)__b); in _mm_add_ps() 102 static __inline__ __m128 __DEFAULT_FN_ATTRS 103 _mm_sub_ss(__m128 __a, __m128 __b) in _mm_sub_ss() 123 static __inline__ __m128 __DEFAULT_FN_ATTRS 124 _mm_sub_ps(__m128 __a, __m128 __b) in _mm_sub_ps() [all …]
|
/external/llvm-project/clang/lib/Headers/ |
D | xmmintrin.h | 17 typedef float __m128 __attribute__((__vector_size__(16), __aligned__(16))); typedef 49 static __inline__ __m128 __DEFAULT_FN_ATTRS 50 _mm_add_ss(__m128 __a, __m128 __b) in _mm_add_ss() 69 static __inline__ __m128 __DEFAULT_FN_ATTRS 70 _mm_add_ps(__m128 __a, __m128 __b) in _mm_add_ps() 72 return (__m128)((__v4sf)__a + (__v4sf)__b); in _mm_add_ps() 91 static __inline__ __m128 __DEFAULT_FN_ATTRS 92 _mm_sub_ss(__m128 __a, __m128 __b) in _mm_sub_ss() 112 static __inline__ __m128 __DEFAULT_FN_ATTRS 113 _mm_sub_ps(__m128 __a, __m128 __b) in _mm_sub_ps() [all …]
|
/external/XNNPACK/src/f32-dwconv2d-chw/gen/ |
D | 5x5s2p2-minmax-sse-1x4-acc5.c | 34 const __m128 vmask_even = _mm_load_ps((const float*) params->sse.mask_even); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4_acc5() 35 const __m128 vmask_odd = _mm_load_ps((const float*) params->sse.mask_odd); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4_acc5() 36 const __m128 vmax = _mm_load_ps(params->sse.max); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4_acc5() 37 const __m128 vmin = _mm_load_ps(params->sse.min); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4_acc5() 39 const __m128 vbias = _mm_load1_ps(weights); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4_acc5() 40 const __m128 vk00 = _mm_load1_ps(weights + 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4_acc5() 41 const __m128 vk01 = _mm_load1_ps(weights + 2); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4_acc5() 42 const __m128 vk02 = _mm_load1_ps(weights + 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4_acc5() 43 const __m128 vk03 = _mm_load1_ps(weights + 4); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4_acc5() 44 const __m128 vk04 = _mm_load1_ps(weights + 5); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4_acc5() [all …]
|
D | 5x5s2p2-minmax-sse-1x4-acc4.c | 34 const __m128 vmask_even = _mm_load_ps((const float*) params->sse.mask_even); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4_acc4() 35 const __m128 vmask_odd = _mm_load_ps((const float*) params->sse.mask_odd); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4_acc4() 36 const __m128 vmax = _mm_load_ps(params->sse.max); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4_acc4() 37 const __m128 vmin = _mm_load_ps(params->sse.min); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4_acc4() 39 const __m128 vbias = _mm_load1_ps(weights); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4_acc4() 40 const __m128 vk00 = _mm_load1_ps(weights + 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4_acc4() 41 const __m128 vk01 = _mm_load1_ps(weights + 2); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4_acc4() 42 const __m128 vk02 = _mm_load1_ps(weights + 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4_acc4() 43 const __m128 vk03 = _mm_load1_ps(weights + 4); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4_acc4() 44 const __m128 vk04 = _mm_load1_ps(weights + 5); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4_acc4() [all …]
|
D | 5x5s2p2-minmax-sse-1x4-acc3.c | 34 const __m128 vmask_even = _mm_load_ps((const float*) params->sse.mask_even); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4_acc3() 35 const __m128 vmask_odd = _mm_load_ps((const float*) params->sse.mask_odd); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4_acc3() 36 const __m128 vmax = _mm_load_ps(params->sse.max); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4_acc3() 37 const __m128 vmin = _mm_load_ps(params->sse.min); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4_acc3() 39 const __m128 vbias = _mm_load1_ps(weights); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4_acc3() 40 const __m128 vk00 = _mm_load1_ps(weights + 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4_acc3() 41 const __m128 vk01 = _mm_load1_ps(weights + 2); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4_acc3() 42 const __m128 vk02 = _mm_load1_ps(weights + 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4_acc3() 43 const __m128 vk03 = _mm_load1_ps(weights + 4); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4_acc3() 44 const __m128 vk04 = _mm_load1_ps(weights + 5); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4_acc3() [all …]
|
D | 5x5s2p2-minmax-sse-1x4-acc2.c | 34 const __m128 vmask_even = _mm_load_ps((const float*) params->sse.mask_even); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4_acc2() 35 const __m128 vmask_odd = _mm_load_ps((const float*) params->sse.mask_odd); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4_acc2() 36 const __m128 vmax = _mm_load_ps(params->sse.max); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4_acc2() 37 const __m128 vmin = _mm_load_ps(params->sse.min); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4_acc2() 39 const __m128 vbias = _mm_load1_ps(weights); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4_acc2() 40 const __m128 vk00 = _mm_load1_ps(weights + 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4_acc2() 41 const __m128 vk01 = _mm_load1_ps(weights + 2); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4_acc2() 42 const __m128 vk02 = _mm_load1_ps(weights + 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4_acc2() 43 const __m128 vk03 = _mm_load1_ps(weights + 4); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4_acc2() 44 const __m128 vk04 = _mm_load1_ps(weights + 5); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4_acc2() [all …]
|
D | 5x5s2p2-minmax-sse-1x4.c | 34 const __m128 vmask_even = _mm_load_ps((const float*) params->sse.mask_even); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4() 35 const __m128 vmask_odd = _mm_load_ps((const float*) params->sse.mask_odd); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4() 36 const __m128 vmax = _mm_load_ps(params->sse.max); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4() 37 const __m128 vmin = _mm_load_ps(params->sse.min); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4() 39 const __m128 vbias = _mm_load1_ps(weights); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4() 40 const __m128 vk00 = _mm_load1_ps(weights + 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4() 41 const __m128 vk01 = _mm_load1_ps(weights + 2); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4() 42 const __m128 vk02 = _mm_load1_ps(weights + 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4() 43 const __m128 vk03 = _mm_load1_ps(weights + 4); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4() 44 const __m128 vk04 = _mm_load1_ps(weights + 5); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4() [all …]
|
D | 5x5p2-minmax-sse-1x4-acc5.c | 33 const __m128 vmask = _mm_load_ps((const float*) params->sse.mask); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc5() 34 const __m128 vmax = _mm_load_ps(params->sse.max); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc5() 35 const __m128 vmin = _mm_load_ps(params->sse.min); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc5() 37 const __m128 vbias = _mm_load1_ps(weights); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc5() 38 const __m128 vk00 = _mm_load1_ps(weights + 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc5() 39 const __m128 vk01 = _mm_load1_ps(weights + 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc5() 40 const __m128 vk02 = _mm_load1_ps(weights + 3); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc5() 41 const __m128 vk03 = _mm_load1_ps(weights + 4); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc5() 42 const __m128 vk04 = _mm_load1_ps(weights + 5); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc5() 43 const __m128 vk10 = _mm_load1_ps(weights + 6); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc5() [all …]
|
D | 5x5p2-minmax-sse-1x4-acc4.c | 33 const __m128 vmask = _mm_load_ps((const float*) params->sse.mask); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc4() 34 const __m128 vmax = _mm_load_ps(params->sse.max); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc4() 35 const __m128 vmin = _mm_load_ps(params->sse.min); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc4() 37 const __m128 vbias = _mm_load1_ps(weights); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc4() 38 const __m128 vk00 = _mm_load1_ps(weights + 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc4() 39 const __m128 vk01 = _mm_load1_ps(weights + 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc4() 40 const __m128 vk02 = _mm_load1_ps(weights + 3); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc4() 41 const __m128 vk03 = _mm_load1_ps(weights + 4); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc4() 42 const __m128 vk04 = _mm_load1_ps(weights + 5); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc4() 43 const __m128 vk10 = _mm_load1_ps(weights + 6); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc4() [all …]
|
D | 5x5p2-minmax-sse-1x4-acc3.c | 33 const __m128 vmask = _mm_load_ps((const float*) params->sse.mask); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc3() 34 const __m128 vmax = _mm_load_ps(params->sse.max); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc3() 35 const __m128 vmin = _mm_load_ps(params->sse.min); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc3() 37 const __m128 vbias = _mm_load1_ps(weights); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc3() 38 const __m128 vk00 = _mm_load1_ps(weights + 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc3() 39 const __m128 vk01 = _mm_load1_ps(weights + 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc3() 40 const __m128 vk02 = _mm_load1_ps(weights + 3); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc3() 41 const __m128 vk03 = _mm_load1_ps(weights + 4); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc3() 42 const __m128 vk04 = _mm_load1_ps(weights + 5); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc3() 43 const __m128 vk10 = _mm_load1_ps(weights + 6); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc3() [all …]
|
D | 5x5s2p2-minmax-sse-2x4-acc3.c | 34 const __m128 vmask_even = _mm_load_ps((const float*) params->sse.mask_even); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_2x4_acc3() 35 const __m128 vmask_odd = _mm_load_ps((const float*) params->sse.mask_odd); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_2x4_acc3() 36 const __m128 vmax = _mm_load_ps(params->sse.max); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_2x4_acc3() 37 const __m128 vmin = _mm_load_ps(params->sse.min); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_2x4_acc3() 39 const __m128 vbias = _mm_load1_ps(weights); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_2x4_acc3() 40 const __m128 vk00 = _mm_load1_ps(weights + 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_2x4_acc3() 41 const __m128 vk01 = _mm_load1_ps(weights + 2); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_2x4_acc3() 42 const __m128 vk02 = _mm_load1_ps(weights + 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_2x4_acc3() 43 const __m128 vk03 = _mm_load1_ps(weights + 4); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_2x4_acc3() 44 const __m128 vk04 = _mm_load1_ps(weights + 5); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_2x4_acc3() [all …]
|
D | 5x5p2-minmax-sse-1x4-acc2.c | 33 const __m128 vmask = _mm_load_ps((const float*) params->sse.mask); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc2() 34 const __m128 vmax = _mm_load_ps(params->sse.max); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc2() 35 const __m128 vmin = _mm_load_ps(params->sse.min); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc2() 37 const __m128 vbias = _mm_load1_ps(weights); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc2() 38 const __m128 vk00 = _mm_load1_ps(weights + 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc2() 39 const __m128 vk01 = _mm_load1_ps(weights + 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc2() 40 const __m128 vk02 = _mm_load1_ps(weights + 3); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc2() 41 const __m128 vk03 = _mm_load1_ps(weights + 4); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc2() 42 const __m128 vk04 = _mm_load1_ps(weights + 5); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc2() 43 const __m128 vk10 = _mm_load1_ps(weights + 6); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc2() [all …]
|
D | 5x5s2p2-minmax-sse-3x4-acc2.c | 34 const __m128 vmask_even = _mm_load_ps((const float*) params->sse.mask_even); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_3x4_acc2() 35 const __m128 vmask_odd = _mm_load_ps((const float*) params->sse.mask_odd); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_3x4_acc2() 36 const __m128 vmax = _mm_load_ps(params->sse.max); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_3x4_acc2() 37 const __m128 vmin = _mm_load_ps(params->sse.min); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_3x4_acc2() 39 const __m128 vbias = _mm_load1_ps(weights); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_3x4_acc2() 40 const __m128 vk00 = _mm_load1_ps(weights + 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_3x4_acc2() 41 const __m128 vk01 = _mm_load1_ps(weights + 2); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_3x4_acc2() 42 const __m128 vk02 = _mm_load1_ps(weights + 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_3x4_acc2() 43 const __m128 vk03 = _mm_load1_ps(weights + 4); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_3x4_acc2() 44 const __m128 vk04 = _mm_load1_ps(weights + 5); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_3x4_acc2() [all …]
|
D | 5x5s2p2-minmax-sse-2x4-acc2.c | 34 const __m128 vmask_even = _mm_load_ps((const float*) params->sse.mask_even); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_2x4_acc2() 35 const __m128 vmask_odd = _mm_load_ps((const float*) params->sse.mask_odd); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_2x4_acc2() 36 const __m128 vmax = _mm_load_ps(params->sse.max); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_2x4_acc2() 37 const __m128 vmin = _mm_load_ps(params->sse.min); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_2x4_acc2() 39 const __m128 vbias = _mm_load1_ps(weights); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_2x4_acc2() 40 const __m128 vk00 = _mm_load1_ps(weights + 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_2x4_acc2() 41 const __m128 vk01 = _mm_load1_ps(weights + 2); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_2x4_acc2() 42 const __m128 vk02 = _mm_load1_ps(weights + 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_2x4_acc2() 43 const __m128 vk03 = _mm_load1_ps(weights + 4); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_2x4_acc2() 44 const __m128 vk04 = _mm_load1_ps(weights + 5); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_2x4_acc2() [all …]
|
D | 5x5p2-minmax-sse-1x4.c | 33 const __m128 vmask = _mm_load_ps((const float*) params->sse.mask); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4() 34 const __m128 vmax = _mm_load_ps(params->sse.max); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4() 35 const __m128 vmin = _mm_load_ps(params->sse.min); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4() 37 const __m128 vbias = _mm_load1_ps(weights); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4() 38 const __m128 vk00 = _mm_load1_ps(weights + 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4() 39 const __m128 vk01 = _mm_load1_ps(weights + 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4() 40 const __m128 vk02 = _mm_load1_ps(weights + 3); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4() 41 const __m128 vk03 = _mm_load1_ps(weights + 4); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4() 42 const __m128 vk04 = _mm_load1_ps(weights + 5); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4() 43 const __m128 vk10 = _mm_load1_ps(weights + 6); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4() [all …]
|
D | 5x5s2p2-minmax-sse-2x4.c | 34 const __m128 vmask_even = _mm_load_ps((const float*) params->sse.mask_even); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_2x4() 35 const __m128 vmask_odd = _mm_load_ps((const float*) params->sse.mask_odd); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_2x4() 36 const __m128 vmax = _mm_load_ps(params->sse.max); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_2x4() 37 const __m128 vmin = _mm_load_ps(params->sse.min); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_2x4() 39 const __m128 vbias = _mm_load1_ps(weights); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_2x4() 40 const __m128 vk00 = _mm_load1_ps(weights + 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_2x4() 41 const __m128 vk01 = _mm_load1_ps(weights + 2); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_2x4() 42 const __m128 vk02 = _mm_load1_ps(weights + 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_2x4() 43 const __m128 vk03 = _mm_load1_ps(weights + 4); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_2x4() 44 const __m128 vk04 = _mm_load1_ps(weights + 5); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_2x4() [all …]
|
D | 5x5s2p2-minmax-sse-3x4.c | 34 const __m128 vmask_even = _mm_load_ps((const float*) params->sse.mask_even); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_3x4() 35 const __m128 vmask_odd = _mm_load_ps((const float*) params->sse.mask_odd); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_3x4() 36 const __m128 vmax = _mm_load_ps(params->sse.max); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_3x4() 37 const __m128 vmin = _mm_load_ps(params->sse.min); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_3x4() 39 const __m128 vbias = _mm_load1_ps(weights); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_3x4() 40 const __m128 vk00 = _mm_load1_ps(weights + 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_3x4() 41 const __m128 vk01 = _mm_load1_ps(weights + 2); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_3x4() 42 const __m128 vk02 = _mm_load1_ps(weights + 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_3x4() 43 const __m128 vk03 = _mm_load1_ps(weights + 4); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_3x4() 44 const __m128 vk04 = _mm_load1_ps(weights + 5); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_3x4() [all …]
|
/external/XNNPACK/src/f32-dwconv/gen/ |
D | up8x25-minmax-sse-acc2.c | 32 const __m128 vmax = _mm_load_ps(params->sse.max); in xnn_f32_dwconv_minmax_ukernel_up8x25__sse_acc2() 33 const __m128 vmin = _mm_load_ps(params->sse.min); in xnn_f32_dwconv_minmax_ukernel_up8x25__sse_acc2() 165 __m128 vacc0123p0 = _mm_load_ps(w); in xnn_f32_dwconv_minmax_ukernel_up8x25__sse_acc2() 166 __m128 vacc4567p0 = _mm_load_ps(w + 4); in xnn_f32_dwconv_minmax_ukernel_up8x25__sse_acc2() 169 const __m128 vi0x0123 = _mm_loadu_ps(i0); in xnn_f32_dwconv_minmax_ukernel_up8x25__sse_acc2() 170 const __m128 vi0x4567 = _mm_loadu_ps(i0 + 4); in xnn_f32_dwconv_minmax_ukernel_up8x25__sse_acc2() 173 const __m128 vk0x0123 = _mm_load_ps(w + 8); in xnn_f32_dwconv_minmax_ukernel_up8x25__sse_acc2() 174 const __m128 vk0x4567 = _mm_load_ps(w + 12); in xnn_f32_dwconv_minmax_ukernel_up8x25__sse_acc2() 178 const __m128 vi1x0123 = _mm_loadu_ps(i1); in xnn_f32_dwconv_minmax_ukernel_up8x25__sse_acc2() 179 const __m128 vi1x4567 = _mm_loadu_ps(i1 + 4); in xnn_f32_dwconv_minmax_ukernel_up8x25__sse_acc2() [all …]
|
D | up8x25-minmax-sse.c | 32 const __m128 vmax = _mm_load_ps(params->sse.max); in xnn_f32_dwconv_minmax_ukernel_up8x25__sse() 33 const __m128 vmin = _mm_load_ps(params->sse.min); in xnn_f32_dwconv_minmax_ukernel_up8x25__sse() 165 __m128 vacc0123p0 = _mm_load_ps(w); in xnn_f32_dwconv_minmax_ukernel_up8x25__sse() 166 __m128 vacc4567p0 = _mm_load_ps(w + 4); in xnn_f32_dwconv_minmax_ukernel_up8x25__sse() 169 const __m128 vi0x0123 = _mm_loadu_ps(i0); in xnn_f32_dwconv_minmax_ukernel_up8x25__sse() 170 const __m128 vi0x4567 = _mm_loadu_ps(i0 + 4); in xnn_f32_dwconv_minmax_ukernel_up8x25__sse() 173 const __m128 vk0x0123 = _mm_load_ps(w + 8); in xnn_f32_dwconv_minmax_ukernel_up8x25__sse() 174 const __m128 vk0x4567 = _mm_load_ps(w + 12); in xnn_f32_dwconv_minmax_ukernel_up8x25__sse() 178 const __m128 vi1x0123 = _mm_loadu_ps(i1); in xnn_f32_dwconv_minmax_ukernel_up8x25__sse() 179 const __m128 vi1x4567 = _mm_loadu_ps(i1 + 4); in xnn_f32_dwconv_minmax_ukernel_up8x25__sse() [all …]
|
/external/webrtc/common_audio/third_party/ooura/fft_size_128/ |
D | ooura_fft_sse2.cc | 27 static __inline __m128 _mm_castsi128_ps(__m128i a) { in _mm_castsi128_ps() 28 return *(__m128*)&a; in _mm_castsi128_ps() 30 static __inline __m128i _mm_castps_si128(__m128 a) { in _mm_castps_si128() 38 const __m128 mm_swap_sign = _mm_load_ps(k_swap_sign); in cft1st_128_SSE2() 42 __m128 a00v = _mm_loadu_ps(&a[j + 0]); in cft1st_128_SSE2() 43 __m128 a04v = _mm_loadu_ps(&a[j + 4]); in cft1st_128_SSE2() 44 __m128 a08v = _mm_loadu_ps(&a[j + 8]); in cft1st_128_SSE2() 45 __m128 a12v = _mm_loadu_ps(&a[j + 12]); in cft1st_128_SSE2() 46 __m128 a01v = _mm_shuffle_ps(a00v, a08v, _MM_SHUFFLE(1, 0, 1, 0)); in cft1st_128_SSE2() 47 __m128 a23v = _mm_shuffle_ps(a00v, a08v, _MM_SHUFFLE(3, 2, 3, 2)); in cft1st_128_SSE2() [all …]
|
/external/XNNPACK/src/f32-maxpool/ |
D | 9p8x-minmax-sse-c4.c | 28 const __m128 voutput_max = _mm_load_ps(params->sse.max); in xnn_f32_maxpool_minmax_ukernel_9p8x__sse_c4() 29 const __m128 voutput_min = _mm_load_ps(params->sse.min); in xnn_f32_maxpool_minmax_ukernel_9p8x__sse_c4() 78 const __m128 vi0 = _mm_loadu_ps(i0); in xnn_f32_maxpool_minmax_ukernel_9p8x__sse_c4() 80 const __m128 vi1 = _mm_loadu_ps(i1); in xnn_f32_maxpool_minmax_ukernel_9p8x__sse_c4() 82 const __m128 vi2 = _mm_loadu_ps(i2); in xnn_f32_maxpool_minmax_ukernel_9p8x__sse_c4() 84 const __m128 vi3 = _mm_loadu_ps(i3); in xnn_f32_maxpool_minmax_ukernel_9p8x__sse_c4() 86 const __m128 vi4 = _mm_loadu_ps(i4); in xnn_f32_maxpool_minmax_ukernel_9p8x__sse_c4() 88 const __m128 vi5 = _mm_loadu_ps(i5); in xnn_f32_maxpool_minmax_ukernel_9p8x__sse_c4() 90 const __m128 vi6 = _mm_loadu_ps(i6); in xnn_f32_maxpool_minmax_ukernel_9p8x__sse_c4() 92 const __m128 vi7 = _mm_loadu_ps(i7); in xnn_f32_maxpool_minmax_ukernel_9p8x__sse_c4() [all …]
|