Lines Matching refs:__a

43 _mm_add_ss(__m128 __a, __m128 __b)  in _mm_add_ss()  argument
45 __a[0] += __b[0]; in _mm_add_ss()
46 return __a; in _mm_add_ss()
50 _mm_add_ps(__m128 __a, __m128 __b) in _mm_add_ps() argument
52 return __a + __b; in _mm_add_ps()
56 _mm_sub_ss(__m128 __a, __m128 __b) in _mm_sub_ss() argument
58 __a[0] -= __b[0]; in _mm_sub_ss()
59 return __a; in _mm_sub_ss()
63 _mm_sub_ps(__m128 __a, __m128 __b) in _mm_sub_ps() argument
65 return __a - __b; in _mm_sub_ps()
69 _mm_mul_ss(__m128 __a, __m128 __b) in _mm_mul_ss() argument
71 __a[0] *= __b[0]; in _mm_mul_ss()
72 return __a; in _mm_mul_ss()
76 _mm_mul_ps(__m128 __a, __m128 __b) in _mm_mul_ps() argument
78 return __a * __b; in _mm_mul_ps()
82 _mm_div_ss(__m128 __a, __m128 __b) in _mm_div_ss() argument
84 __a[0] /= __b[0]; in _mm_div_ss()
85 return __a; in _mm_div_ss()
89 _mm_div_ps(__m128 __a, __m128 __b) in _mm_div_ps() argument
91 return __a / __b; in _mm_div_ps()
95 _mm_sqrt_ss(__m128 __a) in _mm_sqrt_ss() argument
97 __m128 __c = __builtin_ia32_sqrtss(__a); in _mm_sqrt_ss()
98 return (__m128) { __c[0], __a[1], __a[2], __a[3] }; in _mm_sqrt_ss()
102 _mm_sqrt_ps(__m128 __a) in _mm_sqrt_ps() argument
104 return __builtin_ia32_sqrtps(__a); in _mm_sqrt_ps()
108 _mm_rcp_ss(__m128 __a) in _mm_rcp_ss() argument
110 __m128 __c = __builtin_ia32_rcpss(__a); in _mm_rcp_ss()
111 return (__m128) { __c[0], __a[1], __a[2], __a[3] }; in _mm_rcp_ss()
115 _mm_rcp_ps(__m128 __a) in _mm_rcp_ps() argument
117 return __builtin_ia32_rcpps(__a); in _mm_rcp_ps()
121 _mm_rsqrt_ss(__m128 __a) in _mm_rsqrt_ss() argument
123 __m128 __c = __builtin_ia32_rsqrtss(__a); in _mm_rsqrt_ss()
124 return (__m128) { __c[0], __a[1], __a[2], __a[3] }; in _mm_rsqrt_ss()
128 _mm_rsqrt_ps(__m128 __a) in _mm_rsqrt_ps() argument
130 return __builtin_ia32_rsqrtps(__a); in _mm_rsqrt_ps()
134 _mm_min_ss(__m128 __a, __m128 __b) in _mm_min_ss() argument
136 return __builtin_ia32_minss(__a, __b); in _mm_min_ss()
140 _mm_min_ps(__m128 __a, __m128 __b) in _mm_min_ps() argument
142 return __builtin_ia32_minps(__a, __b); in _mm_min_ps()
146 _mm_max_ss(__m128 __a, __m128 __b) in _mm_max_ss() argument
148 return __builtin_ia32_maxss(__a, __b); in _mm_max_ss()
152 _mm_max_ps(__m128 __a, __m128 __b) in _mm_max_ps() argument
154 return __builtin_ia32_maxps(__a, __b); in _mm_max_ps()
158 _mm_and_ps(__m128 __a, __m128 __b) in _mm_and_ps() argument
160 return (__m128)((__v4si)__a & (__v4si)__b); in _mm_and_ps()
164 _mm_andnot_ps(__m128 __a, __m128 __b) in _mm_andnot_ps() argument
166 return (__m128)(~(__v4si)__a & (__v4si)__b); in _mm_andnot_ps()
170 _mm_or_ps(__m128 __a, __m128 __b) in _mm_or_ps() argument
172 return (__m128)((__v4si)__a | (__v4si)__b); in _mm_or_ps()
176 _mm_xor_ps(__m128 __a, __m128 __b) in _mm_xor_ps() argument
178 return (__m128)((__v4si)__a ^ (__v4si)__b); in _mm_xor_ps()
182 _mm_cmpeq_ss(__m128 __a, __m128 __b) in _mm_cmpeq_ss() argument
184 return (__m128)__builtin_ia32_cmpeqss(__a, __b); in _mm_cmpeq_ss()
188 _mm_cmpeq_ps(__m128 __a, __m128 __b) in _mm_cmpeq_ps() argument
190 return (__m128)__builtin_ia32_cmpeqps(__a, __b); in _mm_cmpeq_ps()
194 _mm_cmplt_ss(__m128 __a, __m128 __b) in _mm_cmplt_ss() argument
196 return (__m128)__builtin_ia32_cmpltss(__a, __b); in _mm_cmplt_ss()
200 _mm_cmplt_ps(__m128 __a, __m128 __b) in _mm_cmplt_ps() argument
202 return (__m128)__builtin_ia32_cmpltps(__a, __b); in _mm_cmplt_ps()
206 _mm_cmple_ss(__m128 __a, __m128 __b) in _mm_cmple_ss() argument
208 return (__m128)__builtin_ia32_cmpless(__a, __b); in _mm_cmple_ss()
212 _mm_cmple_ps(__m128 __a, __m128 __b) in _mm_cmple_ps() argument
214 return (__m128)__builtin_ia32_cmpleps(__a, __b); in _mm_cmple_ps()
218 _mm_cmpgt_ss(__m128 __a, __m128 __b) in _mm_cmpgt_ss() argument
220 return (__m128)__builtin_shufflevector(__a, in _mm_cmpgt_ss()
221 __builtin_ia32_cmpltss(__b, __a), in _mm_cmpgt_ss()
226 _mm_cmpgt_ps(__m128 __a, __m128 __b) in _mm_cmpgt_ps() argument
228 return (__m128)__builtin_ia32_cmpltps(__b, __a); in _mm_cmpgt_ps()
232 _mm_cmpge_ss(__m128 __a, __m128 __b) in _mm_cmpge_ss() argument
234 return (__m128)__builtin_shufflevector(__a, in _mm_cmpge_ss()
235 __builtin_ia32_cmpless(__b, __a), in _mm_cmpge_ss()
240 _mm_cmpge_ps(__m128 __a, __m128 __b) in _mm_cmpge_ps() argument
242 return (__m128)__builtin_ia32_cmpleps(__b, __a); in _mm_cmpge_ps()
246 _mm_cmpneq_ss(__m128 __a, __m128 __b) in _mm_cmpneq_ss() argument
248 return (__m128)__builtin_ia32_cmpneqss(__a, __b); in _mm_cmpneq_ss()
252 _mm_cmpneq_ps(__m128 __a, __m128 __b) in _mm_cmpneq_ps() argument
254 return (__m128)__builtin_ia32_cmpneqps(__a, __b); in _mm_cmpneq_ps()
258 _mm_cmpnlt_ss(__m128 __a, __m128 __b) in _mm_cmpnlt_ss() argument
260 return (__m128)__builtin_ia32_cmpnltss(__a, __b); in _mm_cmpnlt_ss()
264 _mm_cmpnlt_ps(__m128 __a, __m128 __b) in _mm_cmpnlt_ps() argument
266 return (__m128)__builtin_ia32_cmpnltps(__a, __b); in _mm_cmpnlt_ps()
270 _mm_cmpnle_ss(__m128 __a, __m128 __b) in _mm_cmpnle_ss() argument
272 return (__m128)__builtin_ia32_cmpnless(__a, __b); in _mm_cmpnle_ss()
276 _mm_cmpnle_ps(__m128 __a, __m128 __b) in _mm_cmpnle_ps() argument
278 return (__m128)__builtin_ia32_cmpnleps(__a, __b); in _mm_cmpnle_ps()
282 _mm_cmpngt_ss(__m128 __a, __m128 __b) in _mm_cmpngt_ss() argument
284 return (__m128)__builtin_shufflevector(__a, in _mm_cmpngt_ss()
285 __builtin_ia32_cmpnltss(__b, __a), in _mm_cmpngt_ss()
290 _mm_cmpngt_ps(__m128 __a, __m128 __b) in _mm_cmpngt_ps() argument
292 return (__m128)__builtin_ia32_cmpnltps(__b, __a); in _mm_cmpngt_ps()
296 _mm_cmpnge_ss(__m128 __a, __m128 __b) in _mm_cmpnge_ss() argument
298 return (__m128)__builtin_shufflevector(__a, in _mm_cmpnge_ss()
299 __builtin_ia32_cmpnless(__b, __a), in _mm_cmpnge_ss()
304 _mm_cmpnge_ps(__m128 __a, __m128 __b) in _mm_cmpnge_ps() argument
306 return (__m128)__builtin_ia32_cmpnleps(__b, __a); in _mm_cmpnge_ps()
310 _mm_cmpord_ss(__m128 __a, __m128 __b) in _mm_cmpord_ss() argument
312 return (__m128)__builtin_ia32_cmpordss(__a, __b); in _mm_cmpord_ss()
316 _mm_cmpord_ps(__m128 __a, __m128 __b) in _mm_cmpord_ps() argument
318 return (__m128)__builtin_ia32_cmpordps(__a, __b); in _mm_cmpord_ps()
322 _mm_cmpunord_ss(__m128 __a, __m128 __b) in _mm_cmpunord_ss() argument
324 return (__m128)__builtin_ia32_cmpunordss(__a, __b); in _mm_cmpunord_ss()
328 _mm_cmpunord_ps(__m128 __a, __m128 __b) in _mm_cmpunord_ps() argument
330 return (__m128)__builtin_ia32_cmpunordps(__a, __b); in _mm_cmpunord_ps()
334 _mm_comieq_ss(__m128 __a, __m128 __b) in _mm_comieq_ss() argument
336 return __builtin_ia32_comieq(__a, __b); in _mm_comieq_ss()
340 _mm_comilt_ss(__m128 __a, __m128 __b) in _mm_comilt_ss() argument
342 return __builtin_ia32_comilt(__a, __b); in _mm_comilt_ss()
346 _mm_comile_ss(__m128 __a, __m128 __b) in _mm_comile_ss() argument
348 return __builtin_ia32_comile(__a, __b); in _mm_comile_ss()
352 _mm_comigt_ss(__m128 __a, __m128 __b) in _mm_comigt_ss() argument
354 return __builtin_ia32_comigt(__a, __b); in _mm_comigt_ss()
358 _mm_comige_ss(__m128 __a, __m128 __b) in _mm_comige_ss() argument
360 return __builtin_ia32_comige(__a, __b); in _mm_comige_ss()
364 _mm_comineq_ss(__m128 __a, __m128 __b) in _mm_comineq_ss() argument
366 return __builtin_ia32_comineq(__a, __b); in _mm_comineq_ss()
370 _mm_ucomieq_ss(__m128 __a, __m128 __b) in _mm_ucomieq_ss() argument
372 return __builtin_ia32_ucomieq(__a, __b); in _mm_ucomieq_ss()
376 _mm_ucomilt_ss(__m128 __a, __m128 __b) in _mm_ucomilt_ss() argument
378 return __builtin_ia32_ucomilt(__a, __b); in _mm_ucomilt_ss()
382 _mm_ucomile_ss(__m128 __a, __m128 __b) in _mm_ucomile_ss() argument
384 return __builtin_ia32_ucomile(__a, __b); in _mm_ucomile_ss()
388 _mm_ucomigt_ss(__m128 __a, __m128 __b) in _mm_ucomigt_ss() argument
390 return __builtin_ia32_ucomigt(__a, __b); in _mm_ucomigt_ss()
394 _mm_ucomige_ss(__m128 __a, __m128 __b) in _mm_ucomige_ss() argument
396 return __builtin_ia32_ucomige(__a, __b); in _mm_ucomige_ss()
400 _mm_ucomineq_ss(__m128 __a, __m128 __b) in _mm_ucomineq_ss() argument
402 return __builtin_ia32_ucomineq(__a, __b); in _mm_ucomineq_ss()
406 _mm_cvtss_si32(__m128 __a) in _mm_cvtss_si32() argument
408 return __builtin_ia32_cvtss2si(__a); in _mm_cvtss_si32()
412 _mm_cvt_ss2si(__m128 __a) in _mm_cvt_ss2si() argument
414 return _mm_cvtss_si32(__a); in _mm_cvt_ss2si()
420 _mm_cvtss_si64(__m128 __a) in _mm_cvtss_si64() argument
422 return __builtin_ia32_cvtss2si64(__a); in _mm_cvtss_si64()
428 _mm_cvtps_pi32(__m128 __a) in _mm_cvtps_pi32() argument
430 return (__m64)__builtin_ia32_cvtps2pi(__a); in _mm_cvtps_pi32()
434 _mm_cvt_ps2pi(__m128 __a) in _mm_cvt_ps2pi() argument
436 return _mm_cvtps_pi32(__a); in _mm_cvt_ps2pi()
440 _mm_cvttss_si32(__m128 __a) in _mm_cvttss_si32() argument
442 return __a[0]; in _mm_cvttss_si32()
446 _mm_cvtt_ss2si(__m128 __a) in _mm_cvtt_ss2si() argument
448 return _mm_cvttss_si32(__a); in _mm_cvtt_ss2si()
452 _mm_cvttss_si64(__m128 __a) in _mm_cvttss_si64() argument
454 return __a[0]; in _mm_cvttss_si64()
458 _mm_cvttps_pi32(__m128 __a) in _mm_cvttps_pi32() argument
460 return (__m64)__builtin_ia32_cvttps2pi(__a); in _mm_cvttps_pi32()
464 _mm_cvtt_ps2pi(__m128 __a) in _mm_cvtt_ps2pi() argument
466 return _mm_cvttps_pi32(__a); in _mm_cvtt_ps2pi()
470 _mm_cvtsi32_ss(__m128 __a, int __b) in _mm_cvtsi32_ss() argument
472 __a[0] = __b; in _mm_cvtsi32_ss()
473 return __a; in _mm_cvtsi32_ss()
477 _mm_cvt_si2ss(__m128 __a, int __b) in _mm_cvt_si2ss() argument
479 return _mm_cvtsi32_ss(__a, __b); in _mm_cvt_si2ss()
485 _mm_cvtsi64_ss(__m128 __a, long long __b) in _mm_cvtsi64_ss() argument
487 __a[0] = __b; in _mm_cvtsi64_ss()
488 return __a; in _mm_cvtsi64_ss()
494 _mm_cvtpi32_ps(__m128 __a, __m64 __b) in _mm_cvtpi32_ps() argument
496 return __builtin_ia32_cvtpi2ps(__a, (__v2si)__b); in _mm_cvtpi32_ps()
500 _mm_cvt_pi2ps(__m128 __a, __m64 __b) in _mm_cvt_pi2ps() argument
502 return _mm_cvtpi32_ps(__a, __b); in _mm_cvt_pi2ps()
506 _mm_cvtss_f32(__m128 __a) in _mm_cvtss_f32() argument
508 return __a[0]; in _mm_cvtss_f32()
512 _mm_loadh_pi(__m128 __a, const __m64 *__p) in _mm_loadh_pi() argument
520 return __builtin_shufflevector(__a, __bb, 0, 1, 4, 5); in _mm_loadh_pi()
524 _mm_loadl_pi(__m128 __a, const __m64 *__p) in _mm_loadl_pi() argument
532 return __builtin_shufflevector(__a, __bb, 4, 5, 2, 3); in _mm_loadl_pi()
575 __m128 __a = _mm_load_ps(__p); in _mm_loadr_ps() local
576 return __builtin_shufflevector(__a, __a, 3, 2, 1, 0); in _mm_loadr_ps()
623 _mm_storeh_pi(__m64 *__p, __m128 __a) in _mm_storeh_pi() argument
625 __builtin_ia32_storehps((__v2si *)__p, __a); in _mm_storeh_pi()
629 _mm_storel_pi(__m64 *__p, __m128 __a) in _mm_storel_pi() argument
631 __builtin_ia32_storelps((__v2si *)__p, __a); in _mm_storel_pi()
635 _mm_store_ss(float *__p, __m128 __a) in _mm_store_ss() argument
640 ((struct __mm_store_ss_struct*)__p)->__u = __a[0]; in _mm_store_ss()
644 _mm_storeu_ps(float *__p, __m128 __a) in _mm_storeu_ps() argument
646 __builtin_ia32_storeups(__p, __a); in _mm_storeu_ps()
650 _mm_store1_ps(float *__p, __m128 __a) in _mm_store1_ps() argument
652 __a = __builtin_shufflevector(__a, __a, 0, 0, 0, 0); in _mm_store1_ps()
653 _mm_storeu_ps(__p, __a); in _mm_store1_ps()
657 _mm_store_ps1(float *__p, __m128 __a) in _mm_store_ps1() argument
659 return _mm_store1_ps(__p, __a); in _mm_store_ps1()
663 _mm_store_ps(float *__p, __m128 __a) in _mm_store_ps() argument
665 *(__m128 *)__p = __a; in _mm_store_ps()
669 _mm_storer_ps(float *__p, __m128 __a) in _mm_storer_ps() argument
671 __a = __builtin_shufflevector(__a, __a, 3, 2, 1, 0); in _mm_storer_ps()
672 _mm_store_ps(__p, __a); in _mm_storer_ps()
688 _mm_stream_pi(__m64 *__p, __m64 __a) in _mm_stream_pi() argument
690 __builtin_ia32_movntq(__p, __a); in _mm_stream_pi()
694 _mm_stream_ps(float *__p, __m128 __a) in _mm_stream_ps() argument
696 __builtin_ia32_movntps(__p, __a); in _mm_stream_ps()
706 _mm_extract_pi16(__m64 __a, int __n) in _mm_extract_pi16() argument
708 __v4hi __b = (__v4hi)__a; in _mm_extract_pi16()
713 _mm_insert_pi16(__m64 __a, int __d, int __n) in _mm_insert_pi16() argument
715 __v4hi __b = (__v4hi)__a; in _mm_insert_pi16()
721 _mm_max_pi16(__m64 __a, __m64 __b) in _mm_max_pi16() argument
723 return (__m64)__builtin_ia32_pmaxsw((__v4hi)__a, (__v4hi)__b); in _mm_max_pi16()
727 _mm_max_pu8(__m64 __a, __m64 __b) in _mm_max_pu8() argument
729 return (__m64)__builtin_ia32_pmaxub((__v8qi)__a, (__v8qi)__b); in _mm_max_pu8()
733 _mm_min_pi16(__m64 __a, __m64 __b) in _mm_min_pi16() argument
735 return (__m64)__builtin_ia32_pminsw((__v4hi)__a, (__v4hi)__b); in _mm_min_pi16()
739 _mm_min_pu8(__m64 __a, __m64 __b) in _mm_min_pu8() argument
741 return (__m64)__builtin_ia32_pminub((__v8qi)__a, (__v8qi)__b); in _mm_min_pu8()
745 _mm_movemask_pi8(__m64 __a) in _mm_movemask_pi8() argument
747 return __builtin_ia32_pmovmskb((__v8qi)__a); in _mm_movemask_pi8()
751 _mm_mulhi_pu16(__m64 __a, __m64 __b) in _mm_mulhi_pu16() argument
753 return (__m64)__builtin_ia32_pmulhuw((__v4hi)__a, (__v4hi)__b); in _mm_mulhi_pu16()
766 _mm_avg_pu8(__m64 __a, __m64 __b) in _mm_avg_pu8() argument
768 return (__m64)__builtin_ia32_pavgb((__v8qi)__a, (__v8qi)__b); in _mm_avg_pu8()
772 _mm_avg_pu16(__m64 __a, __m64 __b) in _mm_avg_pu16() argument
774 return (__m64)__builtin_ia32_pavgw((__v4hi)__a, (__v4hi)__b); in _mm_avg_pu16()
778 _mm_sad_pu8(__m64 __a, __m64 __b) in _mm_sad_pu8() argument
780 return (__m64)__builtin_ia32_psadbw((__v8qi)__a, (__v8qi)__b); in _mm_sad_pu8()
802 _mm_unpackhi_ps(__m128 __a, __m128 __b) in _mm_unpackhi_ps() argument
804 return __builtin_shufflevector(__a, __b, 2, 6, 3, 7); in _mm_unpackhi_ps()
808 _mm_unpacklo_ps(__m128 __a, __m128 __b) in _mm_unpacklo_ps() argument
810 return __builtin_shufflevector(__a, __b, 0, 4, 1, 5); in _mm_unpacklo_ps()
814 _mm_move_ss(__m128 __a, __m128 __b) in _mm_move_ss() argument
816 return __builtin_shufflevector(__a, __b, 4, 1, 2, 3); in _mm_move_ss()
820 _mm_movehl_ps(__m128 __a, __m128 __b) in _mm_movehl_ps() argument
822 return __builtin_shufflevector(__a, __b, 6, 7, 2, 3); in _mm_movehl_ps()
826 _mm_movelh_ps(__m128 __a, __m128 __b) in _mm_movelh_ps() argument
828 return __builtin_shufflevector(__a, __b, 0, 1, 4, 5); in _mm_movelh_ps()
832 _mm_cvtpi16_ps(__m64 __a) in _mm_cvtpi16_ps() argument
838 __b = _mm_cmpgt_pi16(__b, __a); in _mm_cvtpi16_ps()
839 __c = _mm_unpackhi_pi16(__a, __b); in _mm_cvtpi16_ps()
843 __c = _mm_unpacklo_pi16(__a, __b); in _mm_cvtpi16_ps()
850 _mm_cvtpu16_ps(__m64 __a) in _mm_cvtpu16_ps() argument
856 __c = _mm_unpackhi_pi16(__a, __b); in _mm_cvtpu16_ps()
860 __c = _mm_unpacklo_pi16(__a, __b); in _mm_cvtpu16_ps()
867 _mm_cvtpi8_ps(__m64 __a) in _mm_cvtpi8_ps() argument
872 __b = _mm_cmpgt_pi8(__b, __a); in _mm_cvtpi8_ps()
873 __b = _mm_unpacklo_pi8(__a, __b); in _mm_cvtpi8_ps()
879 _mm_cvtpu8_ps(__m64 __a) in _mm_cvtpu8_ps() argument
884 __b = _mm_unpacklo_pi8(__a, __b); in _mm_cvtpu8_ps()
890 _mm_cvtpi32x2_ps(__m64 __a, __m64 __b) in _mm_cvtpi32x2_ps() argument
898 return _mm_cvtpi32_ps(__c, __a); in _mm_cvtpi32x2_ps()
902 _mm_cvtps_pi16(__m128 __a) in _mm_cvtps_pi16() argument
906 __b = _mm_cvtps_pi32(__a); in _mm_cvtps_pi16()
907 __a = _mm_movehl_ps(__a, __a); in _mm_cvtps_pi16()
908 __c = _mm_cvtps_pi32(__a); in _mm_cvtps_pi16()
914 _mm_cvtps_pi8(__m128 __a) in _mm_cvtps_pi8() argument
918 __b = _mm_cvtps_pi16(__a); in _mm_cvtps_pi8()
925 _mm_movemask_ps(__m128 __a) in _mm_movemask_ps() argument
927 return __builtin_ia32_movmskps(__a); in _mm_movemask_ps()