1 /*===---- xopintrin.h - XOP intrinsics -------------------------------------===
2  *
3  * Permission is hereby granted, free of charge, to any person obtaining a copy
4  * of this software and associated documentation files (the "Software"), to deal
5  * in the Software without restriction, including without limitation the rights
6  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7  * copies of the Software, and to permit persons to whom the Software is
8  * furnished to do so, subject to the following conditions:
9  *
10  * The above copyright notice and this permission notice shall be included in
11  * all copies or substantial portions of the Software.
12  *
13  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19  * THE SOFTWARE.
20  *
21  *===-----------------------------------------------------------------------===
22  */
23 
24 #ifndef __X86INTRIN_H
25 #error "Never use <xopintrin.h> directly; include <x86intrin.h> instead."
26 #endif
27 
28 #ifndef __XOPINTRIN_H
29 #define __XOPINTRIN_H
30 
31 #include <fma4intrin.h>
32 
33 /* Define the default attributes for the functions in this file. */
34 #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("xop")))
35 
36 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_maccs_epi16(__m128i __A,__m128i __B,__m128i __C)37 _mm_maccs_epi16(__m128i __A, __m128i __B, __m128i __C)
38 {
39   return (__m128i)__builtin_ia32_vpmacssww((__v8hi)__A, (__v8hi)__B, (__v8hi)__C);
40 }
41 
42 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_macc_epi16(__m128i __A,__m128i __B,__m128i __C)43 _mm_macc_epi16(__m128i __A, __m128i __B, __m128i __C)
44 {
45   return (__m128i)__builtin_ia32_vpmacsww((__v8hi)__A, (__v8hi)__B, (__v8hi)__C);
46 }
47 
48 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_maccsd_epi16(__m128i __A,__m128i __B,__m128i __C)49 _mm_maccsd_epi16(__m128i __A, __m128i __B, __m128i __C)
50 {
51   return (__m128i)__builtin_ia32_vpmacsswd((__v8hi)__A, (__v8hi)__B, (__v4si)__C);
52 }
53 
54 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_maccd_epi16(__m128i __A,__m128i __B,__m128i __C)55 _mm_maccd_epi16(__m128i __A, __m128i __B, __m128i __C)
56 {
57   return (__m128i)__builtin_ia32_vpmacswd((__v8hi)__A, (__v8hi)__B, (__v4si)__C);
58 }
59 
60 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_maccs_epi32(__m128i __A,__m128i __B,__m128i __C)61 _mm_maccs_epi32(__m128i __A, __m128i __B, __m128i __C)
62 {
63   return (__m128i)__builtin_ia32_vpmacssdd((__v4si)__A, (__v4si)__B, (__v4si)__C);
64 }
65 
66 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_macc_epi32(__m128i __A,__m128i __B,__m128i __C)67 _mm_macc_epi32(__m128i __A, __m128i __B, __m128i __C)
68 {
69   return (__m128i)__builtin_ia32_vpmacsdd((__v4si)__A, (__v4si)__B, (__v4si)__C);
70 }
71 
72 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_maccslo_epi32(__m128i __A,__m128i __B,__m128i __C)73 _mm_maccslo_epi32(__m128i __A, __m128i __B, __m128i __C)
74 {
75   return (__m128i)__builtin_ia32_vpmacssdql((__v4si)__A, (__v4si)__B, (__v2di)__C);
76 }
77 
78 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_macclo_epi32(__m128i __A,__m128i __B,__m128i __C)79 _mm_macclo_epi32(__m128i __A, __m128i __B, __m128i __C)
80 {
81   return (__m128i)__builtin_ia32_vpmacsdql((__v4si)__A, (__v4si)__B, (__v2di)__C);
82 }
83 
84 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_maccshi_epi32(__m128i __A,__m128i __B,__m128i __C)85 _mm_maccshi_epi32(__m128i __A, __m128i __B, __m128i __C)
86 {
87   return (__m128i)__builtin_ia32_vpmacssdqh((__v4si)__A, (__v4si)__B, (__v2di)__C);
88 }
89 
90 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_macchi_epi32(__m128i __A,__m128i __B,__m128i __C)91 _mm_macchi_epi32(__m128i __A, __m128i __B, __m128i __C)
92 {
93   return (__m128i)__builtin_ia32_vpmacsdqh((__v4si)__A, (__v4si)__B, (__v2di)__C);
94 }
95 
96 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_maddsd_epi16(__m128i __A,__m128i __B,__m128i __C)97 _mm_maddsd_epi16(__m128i __A, __m128i __B, __m128i __C)
98 {
99   return (__m128i)__builtin_ia32_vpmadcsswd((__v8hi)__A, (__v8hi)__B, (__v4si)__C);
100 }
101 
102 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_maddd_epi16(__m128i __A,__m128i __B,__m128i __C)103 _mm_maddd_epi16(__m128i __A, __m128i __B, __m128i __C)
104 {
105   return (__m128i)__builtin_ia32_vpmadcswd((__v8hi)__A, (__v8hi)__B, (__v4si)__C);
106 }
107 
108 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_haddw_epi8(__m128i __A)109 _mm_haddw_epi8(__m128i __A)
110 {
111   return (__m128i)__builtin_ia32_vphaddbw((__v16qi)__A);
112 }
113 
114 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_haddd_epi8(__m128i __A)115 _mm_haddd_epi8(__m128i __A)
116 {
117   return (__m128i)__builtin_ia32_vphaddbd((__v16qi)__A);
118 }
119 
120 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_haddq_epi8(__m128i __A)121 _mm_haddq_epi8(__m128i __A)
122 {
123   return (__m128i)__builtin_ia32_vphaddbq((__v16qi)__A);
124 }
125 
126 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_haddd_epi16(__m128i __A)127 _mm_haddd_epi16(__m128i __A)
128 {
129   return (__m128i)__builtin_ia32_vphaddwd((__v8hi)__A);
130 }
131 
132 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_haddq_epi16(__m128i __A)133 _mm_haddq_epi16(__m128i __A)
134 {
135   return (__m128i)__builtin_ia32_vphaddwq((__v8hi)__A);
136 }
137 
138 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_haddq_epi32(__m128i __A)139 _mm_haddq_epi32(__m128i __A)
140 {
141   return (__m128i)__builtin_ia32_vphadddq((__v4si)__A);
142 }
143 
144 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_haddw_epu8(__m128i __A)145 _mm_haddw_epu8(__m128i __A)
146 {
147   return (__m128i)__builtin_ia32_vphaddubw((__v16qi)__A);
148 }
149 
150 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_haddd_epu8(__m128i __A)151 _mm_haddd_epu8(__m128i __A)
152 {
153   return (__m128i)__builtin_ia32_vphaddubd((__v16qi)__A);
154 }
155 
156 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_haddq_epu8(__m128i __A)157 _mm_haddq_epu8(__m128i __A)
158 {
159   return (__m128i)__builtin_ia32_vphaddubq((__v16qi)__A);
160 }
161 
162 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_haddd_epu16(__m128i __A)163 _mm_haddd_epu16(__m128i __A)
164 {
165   return (__m128i)__builtin_ia32_vphadduwd((__v8hi)__A);
166 }
167 
168 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_haddq_epu16(__m128i __A)169 _mm_haddq_epu16(__m128i __A)
170 {
171   return (__m128i)__builtin_ia32_vphadduwq((__v8hi)__A);
172 }
173 
174 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_haddq_epu32(__m128i __A)175 _mm_haddq_epu32(__m128i __A)
176 {
177   return (__m128i)__builtin_ia32_vphaddudq((__v4si)__A);
178 }
179 
180 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_hsubw_epi8(__m128i __A)181 _mm_hsubw_epi8(__m128i __A)
182 {
183   return (__m128i)__builtin_ia32_vphsubbw((__v16qi)__A);
184 }
185 
186 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_hsubd_epi16(__m128i __A)187 _mm_hsubd_epi16(__m128i __A)
188 {
189   return (__m128i)__builtin_ia32_vphsubwd((__v8hi)__A);
190 }
191 
192 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_hsubq_epi32(__m128i __A)193 _mm_hsubq_epi32(__m128i __A)
194 {
195   return (__m128i)__builtin_ia32_vphsubdq((__v4si)__A);
196 }
197 
198 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_cmov_si128(__m128i __A,__m128i __B,__m128i __C)199 _mm_cmov_si128(__m128i __A, __m128i __B, __m128i __C)
200 {
201   return (__m128i)__builtin_ia32_vpcmov(__A, __B, __C);
202 }
203 
204 static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_cmov_si256(__m256i __A,__m256i __B,__m256i __C)205 _mm256_cmov_si256(__m256i __A, __m256i __B, __m256i __C)
206 {
207   return (__m256i)__builtin_ia32_vpcmov_256(__A, __B, __C);
208 }
209 
210 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_perm_epi8(__m128i __A,__m128i __B,__m128i __C)211 _mm_perm_epi8(__m128i __A, __m128i __B, __m128i __C)
212 {
213   return (__m128i)__builtin_ia32_vpperm((__v16qi)__A, (__v16qi)__B, (__v16qi)__C);
214 }
215 
216 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_rot_epi8(__m128i __A,__m128i __B)217 _mm_rot_epi8(__m128i __A, __m128i __B)
218 {
219   return (__m128i)__builtin_ia32_vprotb((__v16qi)__A, (__v16qi)__B);
220 }
221 
222 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_rot_epi16(__m128i __A,__m128i __B)223 _mm_rot_epi16(__m128i __A, __m128i __B)
224 {
225   return (__m128i)__builtin_ia32_vprotw((__v8hi)__A, (__v8hi)__B);
226 }
227 
228 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_rot_epi32(__m128i __A,__m128i __B)229 _mm_rot_epi32(__m128i __A, __m128i __B)
230 {
231   return (__m128i)__builtin_ia32_vprotd((__v4si)__A, (__v4si)__B);
232 }
233 
234 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_rot_epi64(__m128i __A,__m128i __B)235 _mm_rot_epi64(__m128i __A, __m128i __B)
236 {
237   return (__m128i)__builtin_ia32_vprotq((__v2di)__A, (__v2di)__B);
238 }
239 
240 #define _mm_roti_epi8(A, N) __extension__ ({ \
241   (__m128i)__builtin_ia32_vprotbi((__v16qi)(__m128i)(A), (N)); })
242 
243 #define _mm_roti_epi16(A, N) __extension__ ({ \
244   (__m128i)__builtin_ia32_vprotwi((__v8hi)(__m128i)(A), (N)); })
245 
246 #define _mm_roti_epi32(A, N) __extension__ ({ \
247   (__m128i)__builtin_ia32_vprotdi((__v4si)(__m128i)(A), (N)); })
248 
249 #define _mm_roti_epi64(A, N) __extension__ ({ \
250   (__m128i)__builtin_ia32_vprotqi((__v2di)(__m128i)(A), (N)); })
251 
252 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_shl_epi8(__m128i __A,__m128i __B)253 _mm_shl_epi8(__m128i __A, __m128i __B)
254 {
255   return (__m128i)__builtin_ia32_vpshlb((__v16qi)__A, (__v16qi)__B);
256 }
257 
258 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_shl_epi16(__m128i __A,__m128i __B)259 _mm_shl_epi16(__m128i __A, __m128i __B)
260 {
261   return (__m128i)__builtin_ia32_vpshlw((__v8hi)__A, (__v8hi)__B);
262 }
263 
264 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_shl_epi32(__m128i __A,__m128i __B)265 _mm_shl_epi32(__m128i __A, __m128i __B)
266 {
267   return (__m128i)__builtin_ia32_vpshld((__v4si)__A, (__v4si)__B);
268 }
269 
270 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_shl_epi64(__m128i __A,__m128i __B)271 _mm_shl_epi64(__m128i __A, __m128i __B)
272 {
273   return (__m128i)__builtin_ia32_vpshlq((__v2di)__A, (__v2di)__B);
274 }
275 
276 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_sha_epi8(__m128i __A,__m128i __B)277 _mm_sha_epi8(__m128i __A, __m128i __B)
278 {
279   return (__m128i)__builtin_ia32_vpshab((__v16qi)__A, (__v16qi)__B);
280 }
281 
282 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_sha_epi16(__m128i __A,__m128i __B)283 _mm_sha_epi16(__m128i __A, __m128i __B)
284 {
285   return (__m128i)__builtin_ia32_vpshaw((__v8hi)__A, (__v8hi)__B);
286 }
287 
288 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_sha_epi32(__m128i __A,__m128i __B)289 _mm_sha_epi32(__m128i __A, __m128i __B)
290 {
291   return (__m128i)__builtin_ia32_vpshad((__v4si)__A, (__v4si)__B);
292 }
293 
294 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_sha_epi64(__m128i __A,__m128i __B)295 _mm_sha_epi64(__m128i __A, __m128i __B)
296 {
297   return (__m128i)__builtin_ia32_vpshaq((__v2di)__A, (__v2di)__B);
298 }
299 
300 #define _mm_com_epu8(A, B, N) __extension__ ({ \
301   (__m128i)__builtin_ia32_vpcomub((__v16qi)(__m128i)(A), \
302                                   (__v16qi)(__m128i)(B), (N)); })
303 
304 #define _mm_com_epu16(A, B, N) __extension__ ({ \
305   (__m128i)__builtin_ia32_vpcomuw((__v8hi)(__m128i)(A), \
306                                   (__v8hi)(__m128i)(B), (N)); })
307 
308 #define _mm_com_epu32(A, B, N) __extension__ ({ \
309   (__m128i)__builtin_ia32_vpcomud((__v4si)(__m128i)(A), \
310                                   (__v4si)(__m128i)(B), (N)); })
311 
312 #define _mm_com_epu64(A, B, N) __extension__ ({ \
313   (__m128i)__builtin_ia32_vpcomuq((__v2di)(__m128i)(A), \
314                                   (__v2di)(__m128i)(B), (N)); })
315 
316 #define _mm_com_epi8(A, B, N) __extension__ ({ \
317   (__m128i)__builtin_ia32_vpcomb((__v16qi)(__m128i)(A), \
318                                  (__v16qi)(__m128i)(B), (N)); })
319 
320 #define _mm_com_epi16(A, B, N) __extension__ ({ \
321   (__m128i)__builtin_ia32_vpcomw((__v8hi)(__m128i)(A), \
322                                  (__v8hi)(__m128i)(B), (N)); })
323 
324 #define _mm_com_epi32(A, B, N) __extension__ ({ \
325   (__m128i)__builtin_ia32_vpcomd((__v4si)(__m128i)(A), \
326                                  (__v4si)(__m128i)(B), (N)); })
327 
328 #define _mm_com_epi64(A, B, N) __extension__ ({ \
329   (__m128i)__builtin_ia32_vpcomq((__v2di)(__m128i)(A), \
330                                  (__v2di)(__m128i)(B), (N)); })
331 
332 #define _MM_PCOMCTRL_LT    0
333 #define _MM_PCOMCTRL_LE    1
334 #define _MM_PCOMCTRL_GT    2
335 #define _MM_PCOMCTRL_GE    3
336 #define _MM_PCOMCTRL_EQ    4
337 #define _MM_PCOMCTRL_NEQ   5
338 #define _MM_PCOMCTRL_FALSE 6
339 #define _MM_PCOMCTRL_TRUE  7
340 
341 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comlt_epu8(__m128i __A,__m128i __B)342 _mm_comlt_epu8(__m128i __A, __m128i __B)
343 {
344   return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_LT);
345 }
346 
347 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comle_epu8(__m128i __A,__m128i __B)348 _mm_comle_epu8(__m128i __A, __m128i __B)
349 {
350   return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_LE);
351 }
352 
353 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comgt_epu8(__m128i __A,__m128i __B)354 _mm_comgt_epu8(__m128i __A, __m128i __B)
355 {
356   return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_GT);
357 }
358 
359 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comge_epu8(__m128i __A,__m128i __B)360 _mm_comge_epu8(__m128i __A, __m128i __B)
361 {
362   return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_GE);
363 }
364 
365 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comeq_epu8(__m128i __A,__m128i __B)366 _mm_comeq_epu8(__m128i __A, __m128i __B)
367 {
368   return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_EQ);
369 }
370 
371 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comneq_epu8(__m128i __A,__m128i __B)372 _mm_comneq_epu8(__m128i __A, __m128i __B)
373 {
374   return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_NEQ);
375 }
376 
377 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comfalse_epu8(__m128i __A,__m128i __B)378 _mm_comfalse_epu8(__m128i __A, __m128i __B)
379 {
380   return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_FALSE);
381 }
382 
383 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comtrue_epu8(__m128i __A,__m128i __B)384 _mm_comtrue_epu8(__m128i __A, __m128i __B)
385 {
386   return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_TRUE);
387 }
388 
389 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comlt_epu16(__m128i __A,__m128i __B)390 _mm_comlt_epu16(__m128i __A, __m128i __B)
391 {
392   return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_LT);
393 }
394 
395 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comle_epu16(__m128i __A,__m128i __B)396 _mm_comle_epu16(__m128i __A, __m128i __B)
397 {
398   return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_LE);
399 }
400 
401 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comgt_epu16(__m128i __A,__m128i __B)402 _mm_comgt_epu16(__m128i __A, __m128i __B)
403 {
404   return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_GT);
405 }
406 
407 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comge_epu16(__m128i __A,__m128i __B)408 _mm_comge_epu16(__m128i __A, __m128i __B)
409 {
410   return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_GE);
411 }
412 
413 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comeq_epu16(__m128i __A,__m128i __B)414 _mm_comeq_epu16(__m128i __A, __m128i __B)
415 {
416   return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_EQ);
417 }
418 
419 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comneq_epu16(__m128i __A,__m128i __B)420 _mm_comneq_epu16(__m128i __A, __m128i __B)
421 {
422   return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_NEQ);
423 }
424 
425 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comfalse_epu16(__m128i __A,__m128i __B)426 _mm_comfalse_epu16(__m128i __A, __m128i __B)
427 {
428   return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_FALSE);
429 }
430 
431 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comtrue_epu16(__m128i __A,__m128i __B)432 _mm_comtrue_epu16(__m128i __A, __m128i __B)
433 {
434   return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_TRUE);
435 }
436 
437 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comlt_epu32(__m128i __A,__m128i __B)438 _mm_comlt_epu32(__m128i __A, __m128i __B)
439 {
440   return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_LT);
441 }
442 
443 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comle_epu32(__m128i __A,__m128i __B)444 _mm_comle_epu32(__m128i __A, __m128i __B)
445 {
446   return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_LE);
447 }
448 
449 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comgt_epu32(__m128i __A,__m128i __B)450 _mm_comgt_epu32(__m128i __A, __m128i __B)
451 {
452   return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_GT);
453 }
454 
455 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comge_epu32(__m128i __A,__m128i __B)456 _mm_comge_epu32(__m128i __A, __m128i __B)
457 {
458   return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_GE);
459 }
460 
461 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comeq_epu32(__m128i __A,__m128i __B)462 _mm_comeq_epu32(__m128i __A, __m128i __B)
463 {
464   return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_EQ);
465 }
466 
467 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comneq_epu32(__m128i __A,__m128i __B)468 _mm_comneq_epu32(__m128i __A, __m128i __B)
469 {
470   return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_NEQ);
471 }
472 
473 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comfalse_epu32(__m128i __A,__m128i __B)474 _mm_comfalse_epu32(__m128i __A, __m128i __B)
475 {
476   return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_FALSE);
477 }
478 
479 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comtrue_epu32(__m128i __A,__m128i __B)480 _mm_comtrue_epu32(__m128i __A, __m128i __B)
481 {
482   return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_TRUE);
483 }
484 
485 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comlt_epu64(__m128i __A,__m128i __B)486 _mm_comlt_epu64(__m128i __A, __m128i __B)
487 {
488   return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_LT);
489 }
490 
491 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comle_epu64(__m128i __A,__m128i __B)492 _mm_comle_epu64(__m128i __A, __m128i __B)
493 {
494   return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_LE);
495 }
496 
497 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comgt_epu64(__m128i __A,__m128i __B)498 _mm_comgt_epu64(__m128i __A, __m128i __B)
499 {
500   return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_GT);
501 }
502 
503 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comge_epu64(__m128i __A,__m128i __B)504 _mm_comge_epu64(__m128i __A, __m128i __B)
505 {
506   return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_GE);
507 }
508 
509 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comeq_epu64(__m128i __A,__m128i __B)510 _mm_comeq_epu64(__m128i __A, __m128i __B)
511 {
512   return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_EQ);
513 }
514 
515 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comneq_epu64(__m128i __A,__m128i __B)516 _mm_comneq_epu64(__m128i __A, __m128i __B)
517 {
518   return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_NEQ);
519 }
520 
521 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comfalse_epu64(__m128i __A,__m128i __B)522 _mm_comfalse_epu64(__m128i __A, __m128i __B)
523 {
524   return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_FALSE);
525 }
526 
527 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comtrue_epu64(__m128i __A,__m128i __B)528 _mm_comtrue_epu64(__m128i __A, __m128i __B)
529 {
530   return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_TRUE);
531 }
532 
533 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comlt_epi8(__m128i __A,__m128i __B)534 _mm_comlt_epi8(__m128i __A, __m128i __B)
535 {
536   return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_LT);
537 }
538 
539 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comle_epi8(__m128i __A,__m128i __B)540 _mm_comle_epi8(__m128i __A, __m128i __B)
541 {
542   return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_LE);
543 }
544 
545 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comgt_epi8(__m128i __A,__m128i __B)546 _mm_comgt_epi8(__m128i __A, __m128i __B)
547 {
548   return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_GT);
549 }
550 
551 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comge_epi8(__m128i __A,__m128i __B)552 _mm_comge_epi8(__m128i __A, __m128i __B)
553 {
554   return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_GE);
555 }
556 
557 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comeq_epi8(__m128i __A,__m128i __B)558 _mm_comeq_epi8(__m128i __A, __m128i __B)
559 {
560   return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_EQ);
561 }
562 
563 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comneq_epi8(__m128i __A,__m128i __B)564 _mm_comneq_epi8(__m128i __A, __m128i __B)
565 {
566   return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_NEQ);
567 }
568 
569 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comfalse_epi8(__m128i __A,__m128i __B)570 _mm_comfalse_epi8(__m128i __A, __m128i __B)
571 {
572   return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_FALSE);
573 }
574 
575 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comtrue_epi8(__m128i __A,__m128i __B)576 _mm_comtrue_epi8(__m128i __A, __m128i __B)
577 {
578   return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_TRUE);
579 }
580 
581 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comlt_epi16(__m128i __A,__m128i __B)582 _mm_comlt_epi16(__m128i __A, __m128i __B)
583 {
584   return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_LT);
585 }
586 
587 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comle_epi16(__m128i __A,__m128i __B)588 _mm_comle_epi16(__m128i __A, __m128i __B)
589 {
590   return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_LE);
591 }
592 
593 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comgt_epi16(__m128i __A,__m128i __B)594 _mm_comgt_epi16(__m128i __A, __m128i __B)
595 {
596   return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_GT);
597 }
598 
599 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comge_epi16(__m128i __A,__m128i __B)600 _mm_comge_epi16(__m128i __A, __m128i __B)
601 {
602   return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_GE);
603 }
604 
605 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comeq_epi16(__m128i __A,__m128i __B)606 _mm_comeq_epi16(__m128i __A, __m128i __B)
607 {
608   return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_EQ);
609 }
610 
611 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comneq_epi16(__m128i __A,__m128i __B)612 _mm_comneq_epi16(__m128i __A, __m128i __B)
613 {
614   return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_NEQ);
615 }
616 
617 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comfalse_epi16(__m128i __A,__m128i __B)618 _mm_comfalse_epi16(__m128i __A, __m128i __B)
619 {
620   return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_FALSE);
621 }
622 
623 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comtrue_epi16(__m128i __A,__m128i __B)624 _mm_comtrue_epi16(__m128i __A, __m128i __B)
625 {
626   return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_TRUE);
627 }
628 
629 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comlt_epi32(__m128i __A,__m128i __B)630 _mm_comlt_epi32(__m128i __A, __m128i __B)
631 {
632   return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_LT);
633 }
634 
635 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comle_epi32(__m128i __A,__m128i __B)636 _mm_comle_epi32(__m128i __A, __m128i __B)
637 {
638   return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_LE);
639 }
640 
641 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comgt_epi32(__m128i __A,__m128i __B)642 _mm_comgt_epi32(__m128i __A, __m128i __B)
643 {
644   return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_GT);
645 }
646 
647 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comge_epi32(__m128i __A,__m128i __B)648 _mm_comge_epi32(__m128i __A, __m128i __B)
649 {
650   return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_GE);
651 }
652 
653 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comeq_epi32(__m128i __A,__m128i __B)654 _mm_comeq_epi32(__m128i __A, __m128i __B)
655 {
656   return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_EQ);
657 }
658 
659 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comneq_epi32(__m128i __A,__m128i __B)660 _mm_comneq_epi32(__m128i __A, __m128i __B)
661 {
662   return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_NEQ);
663 }
664 
665 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comfalse_epi32(__m128i __A,__m128i __B)666 _mm_comfalse_epi32(__m128i __A, __m128i __B)
667 {
668   return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_FALSE);
669 }
670 
671 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comtrue_epi32(__m128i __A,__m128i __B)672 _mm_comtrue_epi32(__m128i __A, __m128i __B)
673 {
674   return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_TRUE);
675 }
676 
677 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comlt_epi64(__m128i __A,__m128i __B)678 _mm_comlt_epi64(__m128i __A, __m128i __B)
679 {
680   return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_LT);
681 }
682 
683 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comle_epi64(__m128i __A,__m128i __B)684 _mm_comle_epi64(__m128i __A, __m128i __B)
685 {
686   return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_LE);
687 }
688 
689 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comgt_epi64(__m128i __A,__m128i __B)690 _mm_comgt_epi64(__m128i __A, __m128i __B)
691 {
692   return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_GT);
693 }
694 
695 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comge_epi64(__m128i __A,__m128i __B)696 _mm_comge_epi64(__m128i __A, __m128i __B)
697 {
698   return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_GE);
699 }
700 
701 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comeq_epi64(__m128i __A,__m128i __B)702 _mm_comeq_epi64(__m128i __A, __m128i __B)
703 {
704   return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_EQ);
705 }
706 
707 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comneq_epi64(__m128i __A,__m128i __B)708 _mm_comneq_epi64(__m128i __A, __m128i __B)
709 {
710   return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_NEQ);
711 }
712 
713 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comfalse_epi64(__m128i __A,__m128i __B)714 _mm_comfalse_epi64(__m128i __A, __m128i __B)
715 {
716   return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_FALSE);
717 }
718 
719 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comtrue_epi64(__m128i __A,__m128i __B)720 _mm_comtrue_epi64(__m128i __A, __m128i __B)
721 {
722   return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_TRUE);
723 }
724 
725 #define _mm_permute2_pd(X, Y, C, I) __extension__ ({ \
726   (__m128d)__builtin_ia32_vpermil2pd((__v2df)(__m128d)(X), \
727                                      (__v2df)(__m128d)(Y), \
728                                      (__v2di)(__m128i)(C), (I)); })
729 
730 #define _mm256_permute2_pd(X, Y, C, I) __extension__ ({ \
731   (__m256d)__builtin_ia32_vpermil2pd256((__v4df)(__m256d)(X), \
732                                         (__v4df)(__m256d)(Y), \
733                                         (__v4di)(__m256i)(C), (I)); })
734 
735 #define _mm_permute2_ps(X, Y, C, I) __extension__ ({ \
736   (__m128)__builtin_ia32_vpermil2ps((__v4sf)(__m128)(X), (__v4sf)(__m128)(Y), \
737                                     (__v4si)(__m128i)(C), (I)); })
738 
739 #define _mm256_permute2_ps(X, Y, C, I) __extension__ ({ \
740   (__m256)__builtin_ia32_vpermil2ps256((__v8sf)(__m256)(X), \
741                                        (__v8sf)(__m256)(Y), \
742                                        (__v8si)(__m256i)(C), (I)); })
743 
744 static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_frcz_ss(__m128 __A)745 _mm_frcz_ss(__m128 __A)
746 {
747   return (__m128)__builtin_ia32_vfrczss((__v4sf)__A);
748 }
749 
750 static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_frcz_sd(__m128d __A)751 _mm_frcz_sd(__m128d __A)
752 {
753   return (__m128d)__builtin_ia32_vfrczsd((__v2df)__A);
754 }
755 
756 static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_frcz_ps(__m128 __A)757 _mm_frcz_ps(__m128 __A)
758 {
759   return (__m128)__builtin_ia32_vfrczps((__v4sf)__A);
760 }
761 
762 static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_frcz_pd(__m128d __A)763 _mm_frcz_pd(__m128d __A)
764 {
765   return (__m128d)__builtin_ia32_vfrczpd((__v2df)__A);
766 }
767 
768 static __inline__ __m256 __DEFAULT_FN_ATTRS
_mm256_frcz_ps(__m256 __A)769 _mm256_frcz_ps(__m256 __A)
770 {
771   return (__m256)__builtin_ia32_vfrczps256((__v8sf)__A);
772 }
773 
774 static __inline__ __m256d __DEFAULT_FN_ATTRS
_mm256_frcz_pd(__m256d __A)775 _mm256_frcz_pd(__m256d __A)
776 {
777   return (__m256d)__builtin_ia32_vfrczpd256((__v4df)__A);
778 }
779 
780 #undef __DEFAULT_FN_ATTRS
781 
782 #endif /* __XOPINTRIN_H */
783