1 /*===---- xopintrin.h - XOP intrinsics -------------------------------------===
2  *
3  * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4  * See https://llvm.org/LICENSE.txt for license information.
5  * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6  *
7  *===-----------------------------------------------------------------------===
8  */
9 
10 #ifndef __X86INTRIN_H
11 #error "Never use <xopintrin.h> directly; include <x86intrin.h> instead."
12 #endif
13 
14 #ifndef __XOPINTRIN_H
15 #define __XOPINTRIN_H
16 
17 #include <fma4intrin.h>
18 
19 /* Define the default attributes for the functions in this file. */
20 #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("xop"), __min_vector_width__(128)))
21 #define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("xop"), __min_vector_width__(256)))
22 
23 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_maccs_epi16(__m128i __A,__m128i __B,__m128i __C)24 _mm_maccs_epi16(__m128i __A, __m128i __B, __m128i __C)
25 {
26   return (__m128i)__builtin_ia32_vpmacssww((__v8hi)__A, (__v8hi)__B, (__v8hi)__C);
27 }
28 
29 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_macc_epi16(__m128i __A,__m128i __B,__m128i __C)30 _mm_macc_epi16(__m128i __A, __m128i __B, __m128i __C)
31 {
32   return (__m128i)__builtin_ia32_vpmacsww((__v8hi)__A, (__v8hi)__B, (__v8hi)__C);
33 }
34 
35 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_maccsd_epi16(__m128i __A,__m128i __B,__m128i __C)36 _mm_maccsd_epi16(__m128i __A, __m128i __B, __m128i __C)
37 {
38   return (__m128i)__builtin_ia32_vpmacsswd((__v8hi)__A, (__v8hi)__B, (__v4si)__C);
39 }
40 
41 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_maccd_epi16(__m128i __A,__m128i __B,__m128i __C)42 _mm_maccd_epi16(__m128i __A, __m128i __B, __m128i __C)
43 {
44   return (__m128i)__builtin_ia32_vpmacswd((__v8hi)__A, (__v8hi)__B, (__v4si)__C);
45 }
46 
47 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_maccs_epi32(__m128i __A,__m128i __B,__m128i __C)48 _mm_maccs_epi32(__m128i __A, __m128i __B, __m128i __C)
49 {
50   return (__m128i)__builtin_ia32_vpmacssdd((__v4si)__A, (__v4si)__B, (__v4si)__C);
51 }
52 
53 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_macc_epi32(__m128i __A,__m128i __B,__m128i __C)54 _mm_macc_epi32(__m128i __A, __m128i __B, __m128i __C)
55 {
56   return (__m128i)__builtin_ia32_vpmacsdd((__v4si)__A, (__v4si)__B, (__v4si)__C);
57 }
58 
59 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_maccslo_epi32(__m128i __A,__m128i __B,__m128i __C)60 _mm_maccslo_epi32(__m128i __A, __m128i __B, __m128i __C)
61 {
62   return (__m128i)__builtin_ia32_vpmacssdql((__v4si)__A, (__v4si)__B, (__v2di)__C);
63 }
64 
65 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_macclo_epi32(__m128i __A,__m128i __B,__m128i __C)66 _mm_macclo_epi32(__m128i __A, __m128i __B, __m128i __C)
67 {
68   return (__m128i)__builtin_ia32_vpmacsdql((__v4si)__A, (__v4si)__B, (__v2di)__C);
69 }
70 
71 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_maccshi_epi32(__m128i __A,__m128i __B,__m128i __C)72 _mm_maccshi_epi32(__m128i __A, __m128i __B, __m128i __C)
73 {
74   return (__m128i)__builtin_ia32_vpmacssdqh((__v4si)__A, (__v4si)__B, (__v2di)__C);
75 }
76 
77 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_macchi_epi32(__m128i __A,__m128i __B,__m128i __C)78 _mm_macchi_epi32(__m128i __A, __m128i __B, __m128i __C)
79 {
80   return (__m128i)__builtin_ia32_vpmacsdqh((__v4si)__A, (__v4si)__B, (__v2di)__C);
81 }
82 
83 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_maddsd_epi16(__m128i __A,__m128i __B,__m128i __C)84 _mm_maddsd_epi16(__m128i __A, __m128i __B, __m128i __C)
85 {
86   return (__m128i)__builtin_ia32_vpmadcsswd((__v8hi)__A, (__v8hi)__B, (__v4si)__C);
87 }
88 
89 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_maddd_epi16(__m128i __A,__m128i __B,__m128i __C)90 _mm_maddd_epi16(__m128i __A, __m128i __B, __m128i __C)
91 {
92   return (__m128i)__builtin_ia32_vpmadcswd((__v8hi)__A, (__v8hi)__B, (__v4si)__C);
93 }
94 
95 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_haddw_epi8(__m128i __A)96 _mm_haddw_epi8(__m128i __A)
97 {
98   return (__m128i)__builtin_ia32_vphaddbw((__v16qi)__A);
99 }
100 
101 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_haddd_epi8(__m128i __A)102 _mm_haddd_epi8(__m128i __A)
103 {
104   return (__m128i)__builtin_ia32_vphaddbd((__v16qi)__A);
105 }
106 
107 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_haddq_epi8(__m128i __A)108 _mm_haddq_epi8(__m128i __A)
109 {
110   return (__m128i)__builtin_ia32_vphaddbq((__v16qi)__A);
111 }
112 
113 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_haddd_epi16(__m128i __A)114 _mm_haddd_epi16(__m128i __A)
115 {
116   return (__m128i)__builtin_ia32_vphaddwd((__v8hi)__A);
117 }
118 
119 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_haddq_epi16(__m128i __A)120 _mm_haddq_epi16(__m128i __A)
121 {
122   return (__m128i)__builtin_ia32_vphaddwq((__v8hi)__A);
123 }
124 
125 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_haddq_epi32(__m128i __A)126 _mm_haddq_epi32(__m128i __A)
127 {
128   return (__m128i)__builtin_ia32_vphadddq((__v4si)__A);
129 }
130 
131 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_haddw_epu8(__m128i __A)132 _mm_haddw_epu8(__m128i __A)
133 {
134   return (__m128i)__builtin_ia32_vphaddubw((__v16qi)__A);
135 }
136 
137 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_haddd_epu8(__m128i __A)138 _mm_haddd_epu8(__m128i __A)
139 {
140   return (__m128i)__builtin_ia32_vphaddubd((__v16qi)__A);
141 }
142 
143 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_haddq_epu8(__m128i __A)144 _mm_haddq_epu8(__m128i __A)
145 {
146   return (__m128i)__builtin_ia32_vphaddubq((__v16qi)__A);
147 }
148 
149 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_haddd_epu16(__m128i __A)150 _mm_haddd_epu16(__m128i __A)
151 {
152   return (__m128i)__builtin_ia32_vphadduwd((__v8hi)__A);
153 }
154 
155 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_haddq_epu16(__m128i __A)156 _mm_haddq_epu16(__m128i __A)
157 {
158   return (__m128i)__builtin_ia32_vphadduwq((__v8hi)__A);
159 }
160 
161 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_haddq_epu32(__m128i __A)162 _mm_haddq_epu32(__m128i __A)
163 {
164   return (__m128i)__builtin_ia32_vphaddudq((__v4si)__A);
165 }
166 
167 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_hsubw_epi8(__m128i __A)168 _mm_hsubw_epi8(__m128i __A)
169 {
170   return (__m128i)__builtin_ia32_vphsubbw((__v16qi)__A);
171 }
172 
173 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_hsubd_epi16(__m128i __A)174 _mm_hsubd_epi16(__m128i __A)
175 {
176   return (__m128i)__builtin_ia32_vphsubwd((__v8hi)__A);
177 }
178 
179 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_hsubq_epi32(__m128i __A)180 _mm_hsubq_epi32(__m128i __A)
181 {
182   return (__m128i)__builtin_ia32_vphsubdq((__v4si)__A);
183 }
184 
185 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_cmov_si128(__m128i __A,__m128i __B,__m128i __C)186 _mm_cmov_si128(__m128i __A, __m128i __B, __m128i __C)
187 {
188   return (__m128i)(((__v2du)__A & (__v2du)__C) | ((__v2du)__B & ~(__v2du)__C));
189 }
190 
191 static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_cmov_si256(__m256i __A,__m256i __B,__m256i __C)192 _mm256_cmov_si256(__m256i __A, __m256i __B, __m256i __C)
193 {
194   return (__m256i)(((__v4du)__A & (__v4du)__C) | ((__v4du)__B & ~(__v4du)__C));
195 }
196 
197 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_perm_epi8(__m128i __A,__m128i __B,__m128i __C)198 _mm_perm_epi8(__m128i __A, __m128i __B, __m128i __C)
199 {
200   return (__m128i)__builtin_ia32_vpperm((__v16qi)__A, (__v16qi)__B, (__v16qi)__C);
201 }
202 
203 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_rot_epi8(__m128i __A,__m128i __B)204 _mm_rot_epi8(__m128i __A, __m128i __B)
205 {
206   return (__m128i)__builtin_ia32_vprotb((__v16qi)__A, (__v16qi)__B);
207 }
208 
209 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_rot_epi16(__m128i __A,__m128i __B)210 _mm_rot_epi16(__m128i __A, __m128i __B)
211 {
212   return (__m128i)__builtin_ia32_vprotw((__v8hi)__A, (__v8hi)__B);
213 }
214 
215 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_rot_epi32(__m128i __A,__m128i __B)216 _mm_rot_epi32(__m128i __A, __m128i __B)
217 {
218   return (__m128i)__builtin_ia32_vprotd((__v4si)__A, (__v4si)__B);
219 }
220 
221 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_rot_epi64(__m128i __A,__m128i __B)222 _mm_rot_epi64(__m128i __A, __m128i __B)
223 {
224   return (__m128i)__builtin_ia32_vprotq((__v2di)__A, (__v2di)__B);
225 }
226 
227 #define _mm_roti_epi8(A, N) \
228   (__m128i)__builtin_ia32_vprotbi((__v16qi)(__m128i)(A), (N))
229 
230 #define _mm_roti_epi16(A, N) \
231   (__m128i)__builtin_ia32_vprotwi((__v8hi)(__m128i)(A), (N))
232 
233 #define _mm_roti_epi32(A, N) \
234   (__m128i)__builtin_ia32_vprotdi((__v4si)(__m128i)(A), (N))
235 
236 #define _mm_roti_epi64(A, N) \
237   (__m128i)__builtin_ia32_vprotqi((__v2di)(__m128i)(A), (N))
238 
239 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_shl_epi8(__m128i __A,__m128i __B)240 _mm_shl_epi8(__m128i __A, __m128i __B)
241 {
242   return (__m128i)__builtin_ia32_vpshlb((__v16qi)__A, (__v16qi)__B);
243 }
244 
245 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_shl_epi16(__m128i __A,__m128i __B)246 _mm_shl_epi16(__m128i __A, __m128i __B)
247 {
248   return (__m128i)__builtin_ia32_vpshlw((__v8hi)__A, (__v8hi)__B);
249 }
250 
251 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_shl_epi32(__m128i __A,__m128i __B)252 _mm_shl_epi32(__m128i __A, __m128i __B)
253 {
254   return (__m128i)__builtin_ia32_vpshld((__v4si)__A, (__v4si)__B);
255 }
256 
257 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_shl_epi64(__m128i __A,__m128i __B)258 _mm_shl_epi64(__m128i __A, __m128i __B)
259 {
260   return (__m128i)__builtin_ia32_vpshlq((__v2di)__A, (__v2di)__B);
261 }
262 
263 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_sha_epi8(__m128i __A,__m128i __B)264 _mm_sha_epi8(__m128i __A, __m128i __B)
265 {
266   return (__m128i)__builtin_ia32_vpshab((__v16qi)__A, (__v16qi)__B);
267 }
268 
269 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_sha_epi16(__m128i __A,__m128i __B)270 _mm_sha_epi16(__m128i __A, __m128i __B)
271 {
272   return (__m128i)__builtin_ia32_vpshaw((__v8hi)__A, (__v8hi)__B);
273 }
274 
275 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_sha_epi32(__m128i __A,__m128i __B)276 _mm_sha_epi32(__m128i __A, __m128i __B)
277 {
278   return (__m128i)__builtin_ia32_vpshad((__v4si)__A, (__v4si)__B);
279 }
280 
281 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_sha_epi64(__m128i __A,__m128i __B)282 _mm_sha_epi64(__m128i __A, __m128i __B)
283 {
284   return (__m128i)__builtin_ia32_vpshaq((__v2di)__A, (__v2di)__B);
285 }
286 
287 #define _mm_com_epu8(A, B, N) \
288   (__m128i)__builtin_ia32_vpcomub((__v16qi)(__m128i)(A), \
289                                   (__v16qi)(__m128i)(B), (N))
290 
291 #define _mm_com_epu16(A, B, N) \
292   (__m128i)__builtin_ia32_vpcomuw((__v8hi)(__m128i)(A), \
293                                   (__v8hi)(__m128i)(B), (N))
294 
295 #define _mm_com_epu32(A, B, N) \
296   (__m128i)__builtin_ia32_vpcomud((__v4si)(__m128i)(A), \
297                                   (__v4si)(__m128i)(B), (N))
298 
299 #define _mm_com_epu64(A, B, N) \
300   (__m128i)__builtin_ia32_vpcomuq((__v2di)(__m128i)(A), \
301                                   (__v2di)(__m128i)(B), (N))
302 
303 #define _mm_com_epi8(A, B, N) \
304   (__m128i)__builtin_ia32_vpcomb((__v16qi)(__m128i)(A), \
305                                  (__v16qi)(__m128i)(B), (N))
306 
307 #define _mm_com_epi16(A, B, N) \
308   (__m128i)__builtin_ia32_vpcomw((__v8hi)(__m128i)(A), \
309                                  (__v8hi)(__m128i)(B), (N))
310 
311 #define _mm_com_epi32(A, B, N) \
312   (__m128i)__builtin_ia32_vpcomd((__v4si)(__m128i)(A), \
313                                  (__v4si)(__m128i)(B), (N))
314 
315 #define _mm_com_epi64(A, B, N) \
316   (__m128i)__builtin_ia32_vpcomq((__v2di)(__m128i)(A), \
317                                  (__v2di)(__m128i)(B), (N))
318 
319 #define _MM_PCOMCTRL_LT    0
320 #define _MM_PCOMCTRL_LE    1
321 #define _MM_PCOMCTRL_GT    2
322 #define _MM_PCOMCTRL_GE    3
323 #define _MM_PCOMCTRL_EQ    4
324 #define _MM_PCOMCTRL_NEQ   5
325 #define _MM_PCOMCTRL_FALSE 6
326 #define _MM_PCOMCTRL_TRUE  7
327 
328 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comlt_epu8(__m128i __A,__m128i __B)329 _mm_comlt_epu8(__m128i __A, __m128i __B)
330 {
331   return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_LT);
332 }
333 
334 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comle_epu8(__m128i __A,__m128i __B)335 _mm_comle_epu8(__m128i __A, __m128i __B)
336 {
337   return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_LE);
338 }
339 
340 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comgt_epu8(__m128i __A,__m128i __B)341 _mm_comgt_epu8(__m128i __A, __m128i __B)
342 {
343   return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_GT);
344 }
345 
346 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comge_epu8(__m128i __A,__m128i __B)347 _mm_comge_epu8(__m128i __A, __m128i __B)
348 {
349   return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_GE);
350 }
351 
352 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comeq_epu8(__m128i __A,__m128i __B)353 _mm_comeq_epu8(__m128i __A, __m128i __B)
354 {
355   return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_EQ);
356 }
357 
358 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comneq_epu8(__m128i __A,__m128i __B)359 _mm_comneq_epu8(__m128i __A, __m128i __B)
360 {
361   return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_NEQ);
362 }
363 
364 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comfalse_epu8(__m128i __A,__m128i __B)365 _mm_comfalse_epu8(__m128i __A, __m128i __B)
366 {
367   return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_FALSE);
368 }
369 
370 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comtrue_epu8(__m128i __A,__m128i __B)371 _mm_comtrue_epu8(__m128i __A, __m128i __B)
372 {
373   return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_TRUE);
374 }
375 
376 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comlt_epu16(__m128i __A,__m128i __B)377 _mm_comlt_epu16(__m128i __A, __m128i __B)
378 {
379   return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_LT);
380 }
381 
382 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comle_epu16(__m128i __A,__m128i __B)383 _mm_comle_epu16(__m128i __A, __m128i __B)
384 {
385   return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_LE);
386 }
387 
388 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comgt_epu16(__m128i __A,__m128i __B)389 _mm_comgt_epu16(__m128i __A, __m128i __B)
390 {
391   return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_GT);
392 }
393 
394 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comge_epu16(__m128i __A,__m128i __B)395 _mm_comge_epu16(__m128i __A, __m128i __B)
396 {
397   return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_GE);
398 }
399 
400 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comeq_epu16(__m128i __A,__m128i __B)401 _mm_comeq_epu16(__m128i __A, __m128i __B)
402 {
403   return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_EQ);
404 }
405 
406 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comneq_epu16(__m128i __A,__m128i __B)407 _mm_comneq_epu16(__m128i __A, __m128i __B)
408 {
409   return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_NEQ);
410 }
411 
412 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comfalse_epu16(__m128i __A,__m128i __B)413 _mm_comfalse_epu16(__m128i __A, __m128i __B)
414 {
415   return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_FALSE);
416 }
417 
418 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comtrue_epu16(__m128i __A,__m128i __B)419 _mm_comtrue_epu16(__m128i __A, __m128i __B)
420 {
421   return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_TRUE);
422 }
423 
424 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comlt_epu32(__m128i __A,__m128i __B)425 _mm_comlt_epu32(__m128i __A, __m128i __B)
426 {
427   return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_LT);
428 }
429 
430 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comle_epu32(__m128i __A,__m128i __B)431 _mm_comle_epu32(__m128i __A, __m128i __B)
432 {
433   return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_LE);
434 }
435 
436 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comgt_epu32(__m128i __A,__m128i __B)437 _mm_comgt_epu32(__m128i __A, __m128i __B)
438 {
439   return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_GT);
440 }
441 
442 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comge_epu32(__m128i __A,__m128i __B)443 _mm_comge_epu32(__m128i __A, __m128i __B)
444 {
445   return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_GE);
446 }
447 
448 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comeq_epu32(__m128i __A,__m128i __B)449 _mm_comeq_epu32(__m128i __A, __m128i __B)
450 {
451   return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_EQ);
452 }
453 
454 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comneq_epu32(__m128i __A,__m128i __B)455 _mm_comneq_epu32(__m128i __A, __m128i __B)
456 {
457   return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_NEQ);
458 }
459 
460 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comfalse_epu32(__m128i __A,__m128i __B)461 _mm_comfalse_epu32(__m128i __A, __m128i __B)
462 {
463   return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_FALSE);
464 }
465 
466 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comtrue_epu32(__m128i __A,__m128i __B)467 _mm_comtrue_epu32(__m128i __A, __m128i __B)
468 {
469   return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_TRUE);
470 }
471 
472 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comlt_epu64(__m128i __A,__m128i __B)473 _mm_comlt_epu64(__m128i __A, __m128i __B)
474 {
475   return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_LT);
476 }
477 
478 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comle_epu64(__m128i __A,__m128i __B)479 _mm_comle_epu64(__m128i __A, __m128i __B)
480 {
481   return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_LE);
482 }
483 
484 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comgt_epu64(__m128i __A,__m128i __B)485 _mm_comgt_epu64(__m128i __A, __m128i __B)
486 {
487   return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_GT);
488 }
489 
490 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comge_epu64(__m128i __A,__m128i __B)491 _mm_comge_epu64(__m128i __A, __m128i __B)
492 {
493   return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_GE);
494 }
495 
496 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comeq_epu64(__m128i __A,__m128i __B)497 _mm_comeq_epu64(__m128i __A, __m128i __B)
498 {
499   return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_EQ);
500 }
501 
502 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comneq_epu64(__m128i __A,__m128i __B)503 _mm_comneq_epu64(__m128i __A, __m128i __B)
504 {
505   return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_NEQ);
506 }
507 
508 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comfalse_epu64(__m128i __A,__m128i __B)509 _mm_comfalse_epu64(__m128i __A, __m128i __B)
510 {
511   return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_FALSE);
512 }
513 
514 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comtrue_epu64(__m128i __A,__m128i __B)515 _mm_comtrue_epu64(__m128i __A, __m128i __B)
516 {
517   return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_TRUE);
518 }
519 
520 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comlt_epi8(__m128i __A,__m128i __B)521 _mm_comlt_epi8(__m128i __A, __m128i __B)
522 {
523   return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_LT);
524 }
525 
526 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comle_epi8(__m128i __A,__m128i __B)527 _mm_comle_epi8(__m128i __A, __m128i __B)
528 {
529   return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_LE);
530 }
531 
532 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comgt_epi8(__m128i __A,__m128i __B)533 _mm_comgt_epi8(__m128i __A, __m128i __B)
534 {
535   return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_GT);
536 }
537 
538 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comge_epi8(__m128i __A,__m128i __B)539 _mm_comge_epi8(__m128i __A, __m128i __B)
540 {
541   return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_GE);
542 }
543 
544 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comeq_epi8(__m128i __A,__m128i __B)545 _mm_comeq_epi8(__m128i __A, __m128i __B)
546 {
547   return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_EQ);
548 }
549 
550 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comneq_epi8(__m128i __A,__m128i __B)551 _mm_comneq_epi8(__m128i __A, __m128i __B)
552 {
553   return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_NEQ);
554 }
555 
556 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comfalse_epi8(__m128i __A,__m128i __B)557 _mm_comfalse_epi8(__m128i __A, __m128i __B)
558 {
559   return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_FALSE);
560 }
561 
562 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comtrue_epi8(__m128i __A,__m128i __B)563 _mm_comtrue_epi8(__m128i __A, __m128i __B)
564 {
565   return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_TRUE);
566 }
567 
568 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comlt_epi16(__m128i __A,__m128i __B)569 _mm_comlt_epi16(__m128i __A, __m128i __B)
570 {
571   return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_LT);
572 }
573 
574 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comle_epi16(__m128i __A,__m128i __B)575 _mm_comle_epi16(__m128i __A, __m128i __B)
576 {
577   return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_LE);
578 }
579 
580 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comgt_epi16(__m128i __A,__m128i __B)581 _mm_comgt_epi16(__m128i __A, __m128i __B)
582 {
583   return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_GT);
584 }
585 
586 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comge_epi16(__m128i __A,__m128i __B)587 _mm_comge_epi16(__m128i __A, __m128i __B)
588 {
589   return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_GE);
590 }
591 
592 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comeq_epi16(__m128i __A,__m128i __B)593 _mm_comeq_epi16(__m128i __A, __m128i __B)
594 {
595   return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_EQ);
596 }
597 
598 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comneq_epi16(__m128i __A,__m128i __B)599 _mm_comneq_epi16(__m128i __A, __m128i __B)
600 {
601   return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_NEQ);
602 }
603 
604 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comfalse_epi16(__m128i __A,__m128i __B)605 _mm_comfalse_epi16(__m128i __A, __m128i __B)
606 {
607   return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_FALSE);
608 }
609 
610 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comtrue_epi16(__m128i __A,__m128i __B)611 _mm_comtrue_epi16(__m128i __A, __m128i __B)
612 {
613   return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_TRUE);
614 }
615 
616 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comlt_epi32(__m128i __A,__m128i __B)617 _mm_comlt_epi32(__m128i __A, __m128i __B)
618 {
619   return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_LT);
620 }
621 
622 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comle_epi32(__m128i __A,__m128i __B)623 _mm_comle_epi32(__m128i __A, __m128i __B)
624 {
625   return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_LE);
626 }
627 
628 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comgt_epi32(__m128i __A,__m128i __B)629 _mm_comgt_epi32(__m128i __A, __m128i __B)
630 {
631   return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_GT);
632 }
633 
634 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comge_epi32(__m128i __A,__m128i __B)635 _mm_comge_epi32(__m128i __A, __m128i __B)
636 {
637   return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_GE);
638 }
639 
640 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comeq_epi32(__m128i __A,__m128i __B)641 _mm_comeq_epi32(__m128i __A, __m128i __B)
642 {
643   return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_EQ);
644 }
645 
646 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comneq_epi32(__m128i __A,__m128i __B)647 _mm_comneq_epi32(__m128i __A, __m128i __B)
648 {
649   return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_NEQ);
650 }
651 
652 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comfalse_epi32(__m128i __A,__m128i __B)653 _mm_comfalse_epi32(__m128i __A, __m128i __B)
654 {
655   return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_FALSE);
656 }
657 
658 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comtrue_epi32(__m128i __A,__m128i __B)659 _mm_comtrue_epi32(__m128i __A, __m128i __B)
660 {
661   return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_TRUE);
662 }
663 
664 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comlt_epi64(__m128i __A,__m128i __B)665 _mm_comlt_epi64(__m128i __A, __m128i __B)
666 {
667   return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_LT);
668 }
669 
670 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comle_epi64(__m128i __A,__m128i __B)671 _mm_comle_epi64(__m128i __A, __m128i __B)
672 {
673   return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_LE);
674 }
675 
676 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comgt_epi64(__m128i __A,__m128i __B)677 _mm_comgt_epi64(__m128i __A, __m128i __B)
678 {
679   return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_GT);
680 }
681 
682 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comge_epi64(__m128i __A,__m128i __B)683 _mm_comge_epi64(__m128i __A, __m128i __B)
684 {
685   return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_GE);
686 }
687 
688 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comeq_epi64(__m128i __A,__m128i __B)689 _mm_comeq_epi64(__m128i __A, __m128i __B)
690 {
691   return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_EQ);
692 }
693 
694 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comneq_epi64(__m128i __A,__m128i __B)695 _mm_comneq_epi64(__m128i __A, __m128i __B)
696 {
697   return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_NEQ);
698 }
699 
700 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comfalse_epi64(__m128i __A,__m128i __B)701 _mm_comfalse_epi64(__m128i __A, __m128i __B)
702 {
703   return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_FALSE);
704 }
705 
706 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comtrue_epi64(__m128i __A,__m128i __B)707 _mm_comtrue_epi64(__m128i __A, __m128i __B)
708 {
709   return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_TRUE);
710 }
711 
712 #define _mm_permute2_pd(X, Y, C, I) \
713   (__m128d)__builtin_ia32_vpermil2pd((__v2df)(__m128d)(X), \
714                                      (__v2df)(__m128d)(Y), \
715                                      (__v2di)(__m128i)(C), (I))
716 
717 #define _mm256_permute2_pd(X, Y, C, I) \
718   (__m256d)__builtin_ia32_vpermil2pd256((__v4df)(__m256d)(X), \
719                                         (__v4df)(__m256d)(Y), \
720                                         (__v4di)(__m256i)(C), (I))
721 
722 #define _mm_permute2_ps(X, Y, C, I) \
723   (__m128)__builtin_ia32_vpermil2ps((__v4sf)(__m128)(X), (__v4sf)(__m128)(Y), \
724                                     (__v4si)(__m128i)(C), (I))
725 
726 #define _mm256_permute2_ps(X, Y, C, I) \
727   (__m256)__builtin_ia32_vpermil2ps256((__v8sf)(__m256)(X), \
728                                        (__v8sf)(__m256)(Y), \
729                                        (__v8si)(__m256i)(C), (I))
730 
731 static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_frcz_ss(__m128 __A)732 _mm_frcz_ss(__m128 __A)
733 {
734   return (__m128)__builtin_ia32_vfrczss((__v4sf)__A);
735 }
736 
737 static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_frcz_sd(__m128d __A)738 _mm_frcz_sd(__m128d __A)
739 {
740   return (__m128d)__builtin_ia32_vfrczsd((__v2df)__A);
741 }
742 
743 static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_frcz_ps(__m128 __A)744 _mm_frcz_ps(__m128 __A)
745 {
746   return (__m128)__builtin_ia32_vfrczps((__v4sf)__A);
747 }
748 
749 static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_frcz_pd(__m128d __A)750 _mm_frcz_pd(__m128d __A)
751 {
752   return (__m128d)__builtin_ia32_vfrczpd((__v2df)__A);
753 }
754 
755 static __inline__ __m256 __DEFAULT_FN_ATTRS256
_mm256_frcz_ps(__m256 __A)756 _mm256_frcz_ps(__m256 __A)
757 {
758   return (__m256)__builtin_ia32_vfrczps256((__v8sf)__A);
759 }
760 
761 static __inline__ __m256d __DEFAULT_FN_ATTRS256
_mm256_frcz_pd(__m256d __A)762 _mm256_frcz_pd(__m256d __A)
763 {
764   return (__m256d)__builtin_ia32_vfrczpd256((__v4df)__A);
765 }
766 
767 #undef __DEFAULT_FN_ATTRS
768 #undef __DEFAULT_FN_ATTRS256
769 
770 #endif /* __XOPINTRIN_H */
771