1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse2 -disable-peephole | FileCheck %s --check-prefix=SSE
3; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx2 -disable-peephole | FileCheck %s --check-prefix=AVX
4; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx512vl,+avx512dq -disable-peephole | FileCheck %s --check-prefix=AVX512
5
6;
7; Float Comparisons
8; Only equal/not-equal/ordered/unordered can be safely commuted
9;
10
11define <4 x i32> @commute_cmpps_eq(<4 x float>* %a0, <4 x float> %a1) {
12; SSE-LABEL: commute_cmpps_eq:
13; SSE:       # %bb.0:
14; SSE-NEXT:    cmpeqps (%rdi), %xmm0
15; SSE-NEXT:    retq
16;
17; AVX-LABEL: commute_cmpps_eq:
18; AVX:       # %bb.0:
19; AVX-NEXT:    vcmpeqps (%rdi), %xmm0, %xmm0
20; AVX-NEXT:    retq
21;
22; AVX512-LABEL: commute_cmpps_eq:
23; AVX512:       # %bb.0:
24; AVX512-NEXT:    vcmpeqps (%rdi), %xmm0, %xmm0
25; AVX512-NEXT:    retq
26  %1 = load <4 x float>, <4 x float>* %a0
27  %2 = fcmp oeq <4 x float> %1, %a1
28  %3 = sext <4 x i1> %2 to <4 x i32>
29  ret <4 x i32> %3
30}
31
32define <4 x i32> @commute_cmpps_ne(<4 x float>* %a0, <4 x float> %a1) {
33; SSE-LABEL: commute_cmpps_ne:
34; SSE:       # %bb.0:
35; SSE-NEXT:    cmpneqps (%rdi), %xmm0
36; SSE-NEXT:    retq
37;
38; AVX-LABEL: commute_cmpps_ne:
39; AVX:       # %bb.0:
40; AVX-NEXT:    vcmpneqps (%rdi), %xmm0, %xmm0
41; AVX-NEXT:    retq
42;
43; AVX512-LABEL: commute_cmpps_ne:
44; AVX512:       # %bb.0:
45; AVX512-NEXT:    vcmpneqps (%rdi), %xmm0, %xmm0
46; AVX512-NEXT:    retq
47  %1 = load <4 x float>, <4 x float>* %a0
48  %2 = fcmp une <4 x float> %1, %a1
49  %3 = sext <4 x i1> %2 to <4 x i32>
50  ret <4 x i32> %3
51}
52
53define <4 x i32> @commute_cmpps_ord(<4 x float>* %a0, <4 x float> %a1) {
54; SSE-LABEL: commute_cmpps_ord:
55; SSE:       # %bb.0:
56; SSE-NEXT:    cmpordps (%rdi), %xmm0
57; SSE-NEXT:    retq
58;
59; AVX-LABEL: commute_cmpps_ord:
60; AVX:       # %bb.0:
61; AVX-NEXT:    vcmpordps (%rdi), %xmm0, %xmm0
62; AVX-NEXT:    retq
63;
64; AVX512-LABEL: commute_cmpps_ord:
65; AVX512:       # %bb.0:
66; AVX512-NEXT:    vcmpordps (%rdi), %xmm0, %xmm0
67; AVX512-NEXT:    retq
68  %1 = load <4 x float>, <4 x float>* %a0
69  %2 = fcmp ord <4 x float> %1, %a1
70  %3 = sext <4 x i1> %2 to <4 x i32>
71  ret <4 x i32> %3
72}
73
74define <4 x i32> @commute_cmpps_uno(<4 x float>* %a0, <4 x float> %a1) {
75; SSE-LABEL: commute_cmpps_uno:
76; SSE:       # %bb.0:
77; SSE-NEXT:    cmpunordps (%rdi), %xmm0
78; SSE-NEXT:    retq
79;
80; AVX-LABEL: commute_cmpps_uno:
81; AVX:       # %bb.0:
82; AVX-NEXT:    vcmpunordps (%rdi), %xmm0, %xmm0
83; AVX-NEXT:    retq
84;
85; AVX512-LABEL: commute_cmpps_uno:
86; AVX512:       # %bb.0:
87; AVX512-NEXT:    vcmpunordps (%rdi), %xmm0, %xmm0
88; AVX512-NEXT:    retq
89  %1 = load <4 x float>, <4 x float>* %a0
90  %2 = fcmp uno <4 x float> %1, %a1
91  %3 = sext <4 x i1> %2 to <4 x i32>
92  ret <4 x i32> %3
93}
94
95define <4 x i32> @commute_cmpps_ueq(<4 x float>* %a0, <4 x float> %a1) {
96; SSE-LABEL: commute_cmpps_ueq:
97; SSE:       # %bb.0:
98; SSE-NEXT:    movaps (%rdi), %xmm1
99; SSE-NEXT:    movaps %xmm1, %xmm2
100; SSE-NEXT:    cmpeqps %xmm0, %xmm2
101; SSE-NEXT:    cmpunordps %xmm1, %xmm0
102; SSE-NEXT:    orps %xmm2, %xmm0
103; SSE-NEXT:    retq
104;
105; AVX-LABEL: commute_cmpps_ueq:
106; AVX:       # %bb.0:
107; AVX-NEXT:    vcmpeq_uqps (%rdi), %xmm0, %xmm0
108; AVX-NEXT:    retq
109;
110; AVX512-LABEL: commute_cmpps_ueq:
111; AVX512:       # %bb.0:
112; AVX512-NEXT:    vcmpeq_uqps (%rdi), %xmm0, %xmm0
113; AVX512-NEXT:    retq
114  %1 = load <4 x float>, <4 x float>* %a0
115  %2 = fcmp ueq <4 x float> %1, %a1
116  %3 = sext <4 x i1> %2 to <4 x i32>
117  ret <4 x i32> %3
118}
119
120define <4 x i32> @commute_cmpps_one(<4 x float>* %a0, <4 x float> %a1) {
121; SSE-LABEL: commute_cmpps_one:
122; SSE:       # %bb.0:
123; SSE-NEXT:    movaps (%rdi), %xmm1
124; SSE-NEXT:    movaps %xmm1, %xmm2
125; SSE-NEXT:    cmpneqps %xmm0, %xmm2
126; SSE-NEXT:    cmpordps %xmm1, %xmm0
127; SSE-NEXT:    andps %xmm2, %xmm0
128; SSE-NEXT:    retq
129;
130; AVX-LABEL: commute_cmpps_one:
131; AVX:       # %bb.0:
132; AVX-NEXT:    vcmpneq_oqps (%rdi), %xmm0, %xmm0
133; AVX-NEXT:    retq
134;
135; AVX512-LABEL: commute_cmpps_one:
136; AVX512:       # %bb.0:
137; AVX512-NEXT:    vcmpneq_oqps (%rdi), %xmm0, %xmm0
138; AVX512-NEXT:    retq
139  %1 = load <4 x float>, <4 x float>* %a0
140  %2 = fcmp one <4 x float> %1, %a1
141  %3 = sext <4 x i1> %2 to <4 x i32>
142  ret <4 x i32> %3
143}
144
145define <4 x i32> @commute_cmpps_lt(<4 x float>* %a0, <4 x float> %a1) {
146; SSE-LABEL: commute_cmpps_lt:
147; SSE:       # %bb.0:
148; SSE-NEXT:    movaps (%rdi), %xmm1
149; SSE-NEXT:    cmpltps %xmm0, %xmm1
150; SSE-NEXT:    movaps %xmm1, %xmm0
151; SSE-NEXT:    retq
152;
153; AVX-LABEL: commute_cmpps_lt:
154; AVX:       # %bb.0:
155; AVX-NEXT:    vmovaps (%rdi), %xmm1
156; AVX-NEXT:    vcmpltps %xmm0, %xmm1, %xmm0
157; AVX-NEXT:    retq
158;
159; AVX512-LABEL: commute_cmpps_lt:
160; AVX512:       # %bb.0:
161; AVX512-NEXT:    vmovaps (%rdi), %xmm1
162; AVX512-NEXT:    vcmpltps %xmm0, %xmm1, %xmm0
163; AVX512-NEXT:    retq
164  %1 = load <4 x float>, <4 x float>* %a0
165  %2 = fcmp olt <4 x float> %1, %a1
166  %3 = sext <4 x i1> %2 to <4 x i32>
167  ret <4 x i32> %3
168}
169
170define <4 x i32> @commute_cmpps_le(<4 x float>* %a0, <4 x float> %a1) {
171; SSE-LABEL: commute_cmpps_le:
172; SSE:       # %bb.0:
173; SSE-NEXT:    movaps (%rdi), %xmm1
174; SSE-NEXT:    cmpleps %xmm0, %xmm1
175; SSE-NEXT:    movaps %xmm1, %xmm0
176; SSE-NEXT:    retq
177;
178; AVX-LABEL: commute_cmpps_le:
179; AVX:       # %bb.0:
180; AVX-NEXT:    vmovaps (%rdi), %xmm1
181; AVX-NEXT:    vcmpleps %xmm0, %xmm1, %xmm0
182; AVX-NEXT:    retq
183;
184; AVX512-LABEL: commute_cmpps_le:
185; AVX512:       # %bb.0:
186; AVX512-NEXT:    vmovaps (%rdi), %xmm1
187; AVX512-NEXT:    vcmpleps %xmm0, %xmm1, %xmm0
188; AVX512-NEXT:    retq
189  %1 = load <4 x float>, <4 x float>* %a0
190  %2 = fcmp ole <4 x float> %1, %a1
191  %3 = sext <4 x i1> %2 to <4 x i32>
192  ret <4 x i32> %3
193}
194
195define <8 x i32> @commute_cmpps_eq_ymm(<8 x float>* %a0, <8 x float> %a1) {
196; SSE-LABEL: commute_cmpps_eq_ymm:
197; SSE:       # %bb.0:
198; SSE-NEXT:    cmpeqps (%rdi), %xmm0
199; SSE-NEXT:    cmpeqps 16(%rdi), %xmm1
200; SSE-NEXT:    retq
201;
202; AVX-LABEL: commute_cmpps_eq_ymm:
203; AVX:       # %bb.0:
204; AVX-NEXT:    vcmpeqps (%rdi), %ymm0, %ymm0
205; AVX-NEXT:    retq
206;
207; AVX512-LABEL: commute_cmpps_eq_ymm:
208; AVX512:       # %bb.0:
209; AVX512-NEXT:    vcmpeqps (%rdi), %ymm0, %ymm0
210; AVX512-NEXT:    retq
211  %1 = load <8 x float>, <8 x float>* %a0
212  %2 = fcmp oeq <8 x float> %1, %a1
213  %3 = sext <8 x i1> %2 to <8 x i32>
214  ret <8 x i32> %3
215}
216
217define <8 x i32> @commute_cmpps_ne_ymm(<8 x float>* %a0, <8 x float> %a1) {
218; SSE-LABEL: commute_cmpps_ne_ymm:
219; SSE:       # %bb.0:
220; SSE-NEXT:    cmpneqps (%rdi), %xmm0
221; SSE-NEXT:    cmpneqps 16(%rdi), %xmm1
222; SSE-NEXT:    retq
223;
224; AVX-LABEL: commute_cmpps_ne_ymm:
225; AVX:       # %bb.0:
226; AVX-NEXT:    vcmpneqps (%rdi), %ymm0, %ymm0
227; AVX-NEXT:    retq
228;
229; AVX512-LABEL: commute_cmpps_ne_ymm:
230; AVX512:       # %bb.0:
231; AVX512-NEXT:    vcmpneqps (%rdi), %ymm0, %ymm0
232; AVX512-NEXT:    retq
233  %1 = load <8 x float>, <8 x float>* %a0
234  %2 = fcmp une <8 x float> %1, %a1
235  %3 = sext <8 x i1> %2 to <8 x i32>
236  ret <8 x i32> %3
237}
238
239define <8 x i32> @commute_cmpps_ord_ymm(<8 x float>* %a0, <8 x float> %a1) {
240; SSE-LABEL: commute_cmpps_ord_ymm:
241; SSE:       # %bb.0:
242; SSE-NEXT:    cmpordps (%rdi), %xmm0
243; SSE-NEXT:    cmpordps 16(%rdi), %xmm1
244; SSE-NEXT:    retq
245;
246; AVX-LABEL: commute_cmpps_ord_ymm:
247; AVX:       # %bb.0:
248; AVX-NEXT:    vcmpordps (%rdi), %ymm0, %ymm0
249; AVX-NEXT:    retq
250;
251; AVX512-LABEL: commute_cmpps_ord_ymm:
252; AVX512:       # %bb.0:
253; AVX512-NEXT:    vcmpordps (%rdi), %ymm0, %ymm0
254; AVX512-NEXT:    retq
255  %1 = load <8 x float>, <8 x float>* %a0
256  %2 = fcmp ord <8 x float> %1, %a1
257  %3 = sext <8 x i1> %2 to <8 x i32>
258  ret <8 x i32> %3
259}
260
261define <8 x i32> @commute_cmpps_uno_ymm(<8 x float>* %a0, <8 x float> %a1) {
262; SSE-LABEL: commute_cmpps_uno_ymm:
263; SSE:       # %bb.0:
264; SSE-NEXT:    cmpunordps (%rdi), %xmm0
265; SSE-NEXT:    cmpunordps 16(%rdi), %xmm1
266; SSE-NEXT:    retq
267;
268; AVX-LABEL: commute_cmpps_uno_ymm:
269; AVX:       # %bb.0:
270; AVX-NEXT:    vcmpunordps (%rdi), %ymm0, %ymm0
271; AVX-NEXT:    retq
272;
273; AVX512-LABEL: commute_cmpps_uno_ymm:
274; AVX512:       # %bb.0:
275; AVX512-NEXT:    vcmpunordps (%rdi), %ymm0, %ymm0
276; AVX512-NEXT:    retq
277  %1 = load <8 x float>, <8 x float>* %a0
278  %2 = fcmp uno <8 x float> %1, %a1
279  %3 = sext <8 x i1> %2 to <8 x i32>
280  ret <8 x i32> %3
281}
282
283define <8 x i32> @commute_cmpps_ueq_ymm(<8 x float>* %a0, <8 x float> %a1) {
284; SSE-LABEL: commute_cmpps_ueq_ymm:
285; SSE:       # %bb.0:
286; SSE-NEXT:    movaps (%rdi), %xmm2
287; SSE-NEXT:    movaps 16(%rdi), %xmm3
288; SSE-NEXT:    movaps %xmm2, %xmm4
289; SSE-NEXT:    cmpeqps %xmm0, %xmm4
290; SSE-NEXT:    cmpunordps %xmm2, %xmm0
291; SSE-NEXT:    orps %xmm4, %xmm0
292; SSE-NEXT:    movaps %xmm3, %xmm2
293; SSE-NEXT:    cmpeqps %xmm1, %xmm2
294; SSE-NEXT:    cmpunordps %xmm3, %xmm1
295; SSE-NEXT:    orps %xmm2, %xmm1
296; SSE-NEXT:    retq
297;
298; AVX-LABEL: commute_cmpps_ueq_ymm:
299; AVX:       # %bb.0:
300; AVX-NEXT:    vcmpeq_uqps (%rdi), %ymm0, %ymm0
301; AVX-NEXT:    retq
302;
303; AVX512-LABEL: commute_cmpps_ueq_ymm:
304; AVX512:       # %bb.0:
305; AVX512-NEXT:    vcmpeq_uqps (%rdi), %ymm0, %ymm0
306; AVX512-NEXT:    retq
307  %1 = load <8 x float>, <8 x float>* %a0
308  %2 = fcmp ueq <8 x float> %1, %a1
309  %3 = sext <8 x i1> %2 to <8 x i32>
310  ret <8 x i32> %3
311}
312
313define <8 x i32> @commute_cmpps_one_ymm(<8 x float>* %a0, <8 x float> %a1) {
314; SSE-LABEL: commute_cmpps_one_ymm:
315; SSE:       # %bb.0:
316; SSE-NEXT:    movaps (%rdi), %xmm2
317; SSE-NEXT:    movaps 16(%rdi), %xmm3
318; SSE-NEXT:    movaps %xmm2, %xmm4
319; SSE-NEXT:    cmpneqps %xmm0, %xmm4
320; SSE-NEXT:    cmpordps %xmm2, %xmm0
321; SSE-NEXT:    andps %xmm4, %xmm0
322; SSE-NEXT:    movaps %xmm3, %xmm2
323; SSE-NEXT:    cmpneqps %xmm1, %xmm2
324; SSE-NEXT:    cmpordps %xmm3, %xmm1
325; SSE-NEXT:    andps %xmm2, %xmm1
326; SSE-NEXT:    retq
327;
328; AVX-LABEL: commute_cmpps_one_ymm:
329; AVX:       # %bb.0:
330; AVX-NEXT:    vcmpneq_oqps (%rdi), %ymm0, %ymm0
331; AVX-NEXT:    retq
332;
333; AVX512-LABEL: commute_cmpps_one_ymm:
334; AVX512:       # %bb.0:
335; AVX512-NEXT:    vcmpneq_oqps (%rdi), %ymm0, %ymm0
336; AVX512-NEXT:    retq
337  %1 = load <8 x float>, <8 x float>* %a0
338  %2 = fcmp one <8 x float> %1, %a1
339  %3 = sext <8 x i1> %2 to <8 x i32>
340  ret <8 x i32> %3
341}
342
343define <8 x i32> @commute_cmpps_lt_ymm(<8 x float>* %a0, <8 x float> %a1) {
344; SSE-LABEL: commute_cmpps_lt_ymm:
345; SSE:       # %bb.0:
346; SSE-NEXT:    movaps (%rdi), %xmm2
347; SSE-NEXT:    movaps 16(%rdi), %xmm3
348; SSE-NEXT:    cmpltps %xmm0, %xmm2
349; SSE-NEXT:    cmpltps %xmm1, %xmm3
350; SSE-NEXT:    movaps %xmm2, %xmm0
351; SSE-NEXT:    movaps %xmm3, %xmm1
352; SSE-NEXT:    retq
353;
354; AVX-LABEL: commute_cmpps_lt_ymm:
355; AVX:       # %bb.0:
356; AVX-NEXT:    vmovaps (%rdi), %ymm1
357; AVX-NEXT:    vcmpltps %ymm0, %ymm1, %ymm0
358; AVX-NEXT:    retq
359;
360; AVX512-LABEL: commute_cmpps_lt_ymm:
361; AVX512:       # %bb.0:
362; AVX512-NEXT:    vmovaps (%rdi), %ymm1
363; AVX512-NEXT:    vcmpltps %ymm0, %ymm1, %ymm0
364; AVX512-NEXT:    retq
365  %1 = load <8 x float>, <8 x float>* %a0
366  %2 = fcmp olt <8 x float> %1, %a1
367  %3 = sext <8 x i1> %2 to <8 x i32>
368  ret <8 x i32> %3
369}
370
371define <8 x i32> @commute_cmpps_le_ymm(<8 x float>* %a0, <8 x float> %a1) {
372; SSE-LABEL: commute_cmpps_le_ymm:
373; SSE:       # %bb.0:
374; SSE-NEXT:    movaps (%rdi), %xmm2
375; SSE-NEXT:    movaps 16(%rdi), %xmm3
376; SSE-NEXT:    cmpleps %xmm0, %xmm2
377; SSE-NEXT:    cmpleps %xmm1, %xmm3
378; SSE-NEXT:    movaps %xmm2, %xmm0
379; SSE-NEXT:    movaps %xmm3, %xmm1
380; SSE-NEXT:    retq
381;
382; AVX-LABEL: commute_cmpps_le_ymm:
383; AVX:       # %bb.0:
384; AVX-NEXT:    vmovaps (%rdi), %ymm1
385; AVX-NEXT:    vcmpleps %ymm0, %ymm1, %ymm0
386; AVX-NEXT:    retq
387;
388; AVX512-LABEL: commute_cmpps_le_ymm:
389; AVX512:       # %bb.0:
390; AVX512-NEXT:    vmovaps (%rdi), %ymm1
391; AVX512-NEXT:    vcmpleps %ymm0, %ymm1, %ymm0
392; AVX512-NEXT:    retq
393  %1 = load <8 x float>, <8 x float>* %a0
394  %2 = fcmp ole <8 x float> %1, %a1
395  %3 = sext <8 x i1> %2 to <8 x i32>
396  ret <8 x i32> %3
397}
398
399;
400; Double Comparisons
401; Only equal/not-equal/ordered/unordered can be safely commuted
402;
403
404define <2 x i64> @commute_cmppd_eq(<2 x double>* %a0, <2 x double> %a1) {
405; SSE-LABEL: commute_cmppd_eq:
406; SSE:       # %bb.0:
407; SSE-NEXT:    cmpeqpd (%rdi), %xmm0
408; SSE-NEXT:    retq
409;
410; AVX-LABEL: commute_cmppd_eq:
411; AVX:       # %bb.0:
412; AVX-NEXT:    vcmpeqpd (%rdi), %xmm0, %xmm0
413; AVX-NEXT:    retq
414;
415; AVX512-LABEL: commute_cmppd_eq:
416; AVX512:       # %bb.0:
417; AVX512-NEXT:    vcmpeqpd (%rdi), %xmm0, %xmm0
418; AVX512-NEXT:    retq
419  %1 = load <2 x double>, <2 x double>* %a0
420  %2 = fcmp oeq <2 x double> %1, %a1
421  %3 = sext <2 x i1> %2 to <2 x i64>
422  ret <2 x i64> %3
423}
424
425define <2 x i64> @commute_cmppd_ne(<2 x double>* %a0, <2 x double> %a1) {
426; SSE-LABEL: commute_cmppd_ne:
427; SSE:       # %bb.0:
428; SSE-NEXT:    cmpneqpd (%rdi), %xmm0
429; SSE-NEXT:    retq
430;
431; AVX-LABEL: commute_cmppd_ne:
432; AVX:       # %bb.0:
433; AVX-NEXT:    vcmpneqpd (%rdi), %xmm0, %xmm0
434; AVX-NEXT:    retq
435;
436; AVX512-LABEL: commute_cmppd_ne:
437; AVX512:       # %bb.0:
438; AVX512-NEXT:    vcmpneqpd (%rdi), %xmm0, %xmm0
439; AVX512-NEXT:    retq
440  %1 = load <2 x double>, <2 x double>* %a0
441  %2 = fcmp une <2 x double> %1, %a1
442  %3 = sext <2 x i1> %2 to <2 x i64>
443  ret <2 x i64> %3
444}
445
446define <2 x i64> @commute_cmppd_ord(<2 x double>* %a0, <2 x double> %a1) {
447; SSE-LABEL: commute_cmppd_ord:
448; SSE:       # %bb.0:
449; SSE-NEXT:    cmpordpd (%rdi), %xmm0
450; SSE-NEXT:    retq
451;
452; AVX-LABEL: commute_cmppd_ord:
453; AVX:       # %bb.0:
454; AVX-NEXT:    vcmpordpd (%rdi), %xmm0, %xmm0
455; AVX-NEXT:    retq
456;
457; AVX512-LABEL: commute_cmppd_ord:
458; AVX512:       # %bb.0:
459; AVX512-NEXT:    vcmpordpd (%rdi), %xmm0, %xmm0
460; AVX512-NEXT:    retq
461  %1 = load <2 x double>, <2 x double>* %a0
462  %2 = fcmp ord <2 x double> %1, %a1
463  %3 = sext <2 x i1> %2 to <2 x i64>
464  ret <2 x i64> %3
465}
466
467define <2 x i64> @commute_cmppd_ueq(<2 x double>* %a0, <2 x double> %a1) {
468; SSE-LABEL: commute_cmppd_ueq:
469; SSE:       # %bb.0:
470; SSE-NEXT:    movapd (%rdi), %xmm1
471; SSE-NEXT:    movapd %xmm1, %xmm2
472; SSE-NEXT:    cmpeqpd %xmm0, %xmm2
473; SSE-NEXT:    cmpunordpd %xmm1, %xmm0
474; SSE-NEXT:    orpd %xmm2, %xmm0
475; SSE-NEXT:    retq
476;
477; AVX-LABEL: commute_cmppd_ueq:
478; AVX:       # %bb.0:
479; AVX-NEXT:    vcmpeq_uqpd (%rdi), %xmm0, %xmm0
480; AVX-NEXT:    retq
481;
482; AVX512-LABEL: commute_cmppd_ueq:
483; AVX512:       # %bb.0:
484; AVX512-NEXT:    vcmpeq_uqpd (%rdi), %xmm0, %xmm0
485; AVX512-NEXT:    retq
486  %1 = load <2 x double>, <2 x double>* %a0
487  %2 = fcmp ueq <2 x double> %1, %a1
488  %3 = sext <2 x i1> %2 to <2 x i64>
489  ret <2 x i64> %3
490}
491
492define <2 x i64> @commute_cmppd_one(<2 x double>* %a0, <2 x double> %a1) {
493; SSE-LABEL: commute_cmppd_one:
494; SSE:       # %bb.0:
495; SSE-NEXT:    movapd (%rdi), %xmm1
496; SSE-NEXT:    movapd %xmm1, %xmm2
497; SSE-NEXT:    cmpneqpd %xmm0, %xmm2
498; SSE-NEXT:    cmpordpd %xmm1, %xmm0
499; SSE-NEXT:    andpd %xmm2, %xmm0
500; SSE-NEXT:    retq
501;
502; AVX-LABEL: commute_cmppd_one:
503; AVX:       # %bb.0:
504; AVX-NEXT:    vcmpneq_oqpd (%rdi), %xmm0, %xmm0
505; AVX-NEXT:    retq
506;
507; AVX512-LABEL: commute_cmppd_one:
508; AVX512:       # %bb.0:
509; AVX512-NEXT:    vcmpneq_oqpd (%rdi), %xmm0, %xmm0
510; AVX512-NEXT:    retq
511  %1 = load <2 x double>, <2 x double>* %a0
512  %2 = fcmp one <2 x double> %1, %a1
513  %3 = sext <2 x i1> %2 to <2 x i64>
514  ret <2 x i64> %3
515}
516
517define <2 x i64> @commute_cmppd_uno(<2 x double>* %a0, <2 x double> %a1) {
518; SSE-LABEL: commute_cmppd_uno:
519; SSE:       # %bb.0:
520; SSE-NEXT:    cmpunordpd (%rdi), %xmm0
521; SSE-NEXT:    retq
522;
523; AVX-LABEL: commute_cmppd_uno:
524; AVX:       # %bb.0:
525; AVX-NEXT:    vcmpunordpd (%rdi), %xmm0, %xmm0
526; AVX-NEXT:    retq
527;
528; AVX512-LABEL: commute_cmppd_uno:
529; AVX512:       # %bb.0:
530; AVX512-NEXT:    vcmpunordpd (%rdi), %xmm0, %xmm0
531; AVX512-NEXT:    retq
532  %1 = load <2 x double>, <2 x double>* %a0
533  %2 = fcmp uno <2 x double> %1, %a1
534  %3 = sext <2 x i1> %2 to <2 x i64>
535  ret <2 x i64> %3
536}
537
538define <2 x i64> @commute_cmppd_lt(<2 x double>* %a0, <2 x double> %a1) {
539; SSE-LABEL: commute_cmppd_lt:
540; SSE:       # %bb.0:
541; SSE-NEXT:    movapd (%rdi), %xmm1
542; SSE-NEXT:    cmpltpd %xmm0, %xmm1
543; SSE-NEXT:    movapd %xmm1, %xmm0
544; SSE-NEXT:    retq
545;
546; AVX-LABEL: commute_cmppd_lt:
547; AVX:       # %bb.0:
548; AVX-NEXT:    vmovapd (%rdi), %xmm1
549; AVX-NEXT:    vcmpltpd %xmm0, %xmm1, %xmm0
550; AVX-NEXT:    retq
551;
552; AVX512-LABEL: commute_cmppd_lt:
553; AVX512:       # %bb.0:
554; AVX512-NEXT:    vmovapd (%rdi), %xmm1
555; AVX512-NEXT:    vcmpltpd %xmm0, %xmm1, %xmm0
556; AVX512-NEXT:    retq
557  %1 = load <2 x double>, <2 x double>* %a0
558  %2 = fcmp olt <2 x double> %1, %a1
559  %3 = sext <2 x i1> %2 to <2 x i64>
560  ret <2 x i64> %3
561}
562
563define <2 x i64> @commute_cmppd_le(<2 x double>* %a0, <2 x double> %a1) {
564; SSE-LABEL: commute_cmppd_le:
565; SSE:       # %bb.0:
566; SSE-NEXT:    movapd (%rdi), %xmm1
567; SSE-NEXT:    cmplepd %xmm0, %xmm1
568; SSE-NEXT:    movapd %xmm1, %xmm0
569; SSE-NEXT:    retq
570;
571; AVX-LABEL: commute_cmppd_le:
572; AVX:       # %bb.0:
573; AVX-NEXT:    vmovapd (%rdi), %xmm1
574; AVX-NEXT:    vcmplepd %xmm0, %xmm1, %xmm0
575; AVX-NEXT:    retq
576;
577; AVX512-LABEL: commute_cmppd_le:
578; AVX512:       # %bb.0:
579; AVX512-NEXT:    vmovapd (%rdi), %xmm1
580; AVX512-NEXT:    vcmplepd %xmm0, %xmm1, %xmm0
581; AVX512-NEXT:    retq
582  %1 = load <2 x double>, <2 x double>* %a0
583  %2 = fcmp ole <2 x double> %1, %a1
584  %3 = sext <2 x i1> %2 to <2 x i64>
585  ret <2 x i64> %3
586}
587
588define <4 x i64> @commute_cmppd_eq_ymmm(<4 x double>* %a0, <4 x double> %a1) {
589; SSE-LABEL: commute_cmppd_eq_ymmm:
590; SSE:       # %bb.0:
591; SSE-NEXT:    cmpeqpd (%rdi), %xmm0
592; SSE-NEXT:    cmpeqpd 16(%rdi), %xmm1
593; SSE-NEXT:    retq
594;
595; AVX-LABEL: commute_cmppd_eq_ymmm:
596; AVX:       # %bb.0:
597; AVX-NEXT:    vcmpeqpd (%rdi), %ymm0, %ymm0
598; AVX-NEXT:    retq
599;
600; AVX512-LABEL: commute_cmppd_eq_ymmm:
601; AVX512:       # %bb.0:
602; AVX512-NEXT:    vcmpeqpd (%rdi), %ymm0, %ymm0
603; AVX512-NEXT:    retq
604  %1 = load <4 x double>, <4 x double>* %a0
605  %2 = fcmp oeq <4 x double> %1, %a1
606  %3 = sext <4 x i1> %2 to <4 x i64>
607  ret <4 x i64> %3
608}
609
610define <4 x i64> @commute_cmppd_ne_ymmm(<4 x double>* %a0, <4 x double> %a1) {
611; SSE-LABEL: commute_cmppd_ne_ymmm:
612; SSE:       # %bb.0:
613; SSE-NEXT:    cmpneqpd (%rdi), %xmm0
614; SSE-NEXT:    cmpneqpd 16(%rdi), %xmm1
615; SSE-NEXT:    retq
616;
617; AVX-LABEL: commute_cmppd_ne_ymmm:
618; AVX:       # %bb.0:
619; AVX-NEXT:    vcmpneqpd (%rdi), %ymm0, %ymm0
620; AVX-NEXT:    retq
621;
622; AVX512-LABEL: commute_cmppd_ne_ymmm:
623; AVX512:       # %bb.0:
624; AVX512-NEXT:    vcmpneqpd (%rdi), %ymm0, %ymm0
625; AVX512-NEXT:    retq
626  %1 = load <4 x double>, <4 x double>* %a0
627  %2 = fcmp une <4 x double> %1, %a1
628  %3 = sext <4 x i1> %2 to <4 x i64>
629  ret <4 x i64> %3
630}
631
632define <4 x i64> @commute_cmppd_ord_ymmm(<4 x double>* %a0, <4 x double> %a1) {
633; SSE-LABEL: commute_cmppd_ord_ymmm:
634; SSE:       # %bb.0:
635; SSE-NEXT:    cmpordpd (%rdi), %xmm0
636; SSE-NEXT:    cmpordpd 16(%rdi), %xmm1
637; SSE-NEXT:    retq
638;
639; AVX-LABEL: commute_cmppd_ord_ymmm:
640; AVX:       # %bb.0:
641; AVX-NEXT:    vcmpordpd (%rdi), %ymm0, %ymm0
642; AVX-NEXT:    retq
643;
644; AVX512-LABEL: commute_cmppd_ord_ymmm:
645; AVX512:       # %bb.0:
646; AVX512-NEXT:    vcmpordpd (%rdi), %ymm0, %ymm0
647; AVX512-NEXT:    retq
648  %1 = load <4 x double>, <4 x double>* %a0
649  %2 = fcmp ord <4 x double> %1, %a1
650  %3 = sext <4 x i1> %2 to <4 x i64>
651  ret <4 x i64> %3
652}
653
654define <4 x i64> @commute_cmppd_uno_ymmm(<4 x double>* %a0, <4 x double> %a1) {
655; SSE-LABEL: commute_cmppd_uno_ymmm:
656; SSE:       # %bb.0:
657; SSE-NEXT:    cmpunordpd (%rdi), %xmm0
658; SSE-NEXT:    cmpunordpd 16(%rdi), %xmm1
659; SSE-NEXT:    retq
660;
661; AVX-LABEL: commute_cmppd_uno_ymmm:
662; AVX:       # %bb.0:
663; AVX-NEXT:    vcmpunordpd (%rdi), %ymm0, %ymm0
664; AVX-NEXT:    retq
665;
666; AVX512-LABEL: commute_cmppd_uno_ymmm:
667; AVX512:       # %bb.0:
668; AVX512-NEXT:    vcmpunordpd (%rdi), %ymm0, %ymm0
669; AVX512-NEXT:    retq
670  %1 = load <4 x double>, <4 x double>* %a0
671  %2 = fcmp uno <4 x double> %1, %a1
672  %3 = sext <4 x i1> %2 to <4 x i64>
673  ret <4 x i64> %3
674}
675
676define <4 x i64> @commute_cmppd_ueq_ymmm(<4 x double>* %a0, <4 x double> %a1) {
677; SSE-LABEL: commute_cmppd_ueq_ymmm:
678; SSE:       # %bb.0:
679; SSE-NEXT:    movapd (%rdi), %xmm2
680; SSE-NEXT:    movapd 16(%rdi), %xmm3
681; SSE-NEXT:    movapd %xmm2, %xmm4
682; SSE-NEXT:    cmpeqpd %xmm0, %xmm4
683; SSE-NEXT:    cmpunordpd %xmm2, %xmm0
684; SSE-NEXT:    orpd %xmm4, %xmm0
685; SSE-NEXT:    movapd %xmm3, %xmm2
686; SSE-NEXT:    cmpeqpd %xmm1, %xmm2
687; SSE-NEXT:    cmpunordpd %xmm3, %xmm1
688; SSE-NEXT:    orpd %xmm2, %xmm1
689; SSE-NEXT:    retq
690;
691; AVX-LABEL: commute_cmppd_ueq_ymmm:
692; AVX:       # %bb.0:
693; AVX-NEXT:    vcmpeq_uqpd (%rdi), %ymm0, %ymm0
694; AVX-NEXT:    retq
695;
696; AVX512-LABEL: commute_cmppd_ueq_ymmm:
697; AVX512:       # %bb.0:
698; AVX512-NEXT:    vcmpeq_uqpd (%rdi), %ymm0, %ymm0
699; AVX512-NEXT:    retq
700  %1 = load <4 x double>, <4 x double>* %a0
701  %2 = fcmp ueq <4 x double> %1, %a1
702  %3 = sext <4 x i1> %2 to <4 x i64>
703  ret <4 x i64> %3
704}
705
706define <4 x i64> @commute_cmppd_one_ymmm(<4 x double>* %a0, <4 x double> %a1) {
707; SSE-LABEL: commute_cmppd_one_ymmm:
708; SSE:       # %bb.0:
709; SSE-NEXT:    movapd (%rdi), %xmm2
710; SSE-NEXT:    movapd 16(%rdi), %xmm3
711; SSE-NEXT:    movapd %xmm2, %xmm4
712; SSE-NEXT:    cmpneqpd %xmm0, %xmm4
713; SSE-NEXT:    cmpordpd %xmm2, %xmm0
714; SSE-NEXT:    andpd %xmm4, %xmm0
715; SSE-NEXT:    movapd %xmm3, %xmm2
716; SSE-NEXT:    cmpneqpd %xmm1, %xmm2
717; SSE-NEXT:    cmpordpd %xmm3, %xmm1
718; SSE-NEXT:    andpd %xmm2, %xmm1
719; SSE-NEXT:    retq
720;
721; AVX-LABEL: commute_cmppd_one_ymmm:
722; AVX:       # %bb.0:
723; AVX-NEXT:    vcmpneq_oqpd (%rdi), %ymm0, %ymm0
724; AVX-NEXT:    retq
725;
726; AVX512-LABEL: commute_cmppd_one_ymmm:
727; AVX512:       # %bb.0:
728; AVX512-NEXT:    vcmpneq_oqpd (%rdi), %ymm0, %ymm0
729; AVX512-NEXT:    retq
730  %1 = load <4 x double>, <4 x double>* %a0
731  %2 = fcmp one <4 x double> %1, %a1
732  %3 = sext <4 x i1> %2 to <4 x i64>
733  ret <4 x i64> %3
734}
735
736define <4 x i64> @commute_cmppd_lt_ymmm(<4 x double>* %a0, <4 x double> %a1) {
737; SSE-LABEL: commute_cmppd_lt_ymmm:
738; SSE:       # %bb.0:
739; SSE-NEXT:    movapd (%rdi), %xmm2
740; SSE-NEXT:    movapd 16(%rdi), %xmm3
741; SSE-NEXT:    cmpltpd %xmm0, %xmm2
742; SSE-NEXT:    cmpltpd %xmm1, %xmm3
743; SSE-NEXT:    movapd %xmm2, %xmm0
744; SSE-NEXT:    movapd %xmm3, %xmm1
745; SSE-NEXT:    retq
746;
747; AVX-LABEL: commute_cmppd_lt_ymmm:
748; AVX:       # %bb.0:
749; AVX-NEXT:    vmovapd (%rdi), %ymm1
750; AVX-NEXT:    vcmpltpd %ymm0, %ymm1, %ymm0
751; AVX-NEXT:    retq
752;
753; AVX512-LABEL: commute_cmppd_lt_ymmm:
754; AVX512:       # %bb.0:
755; AVX512-NEXT:    vmovapd (%rdi), %ymm1
756; AVX512-NEXT:    vcmpltpd %ymm0, %ymm1, %ymm0
757; AVX512-NEXT:    retq
758  %1 = load <4 x double>, <4 x double>* %a0
759  %2 = fcmp olt <4 x double> %1, %a1
760  %3 = sext <4 x i1> %2 to <4 x i64>
761  ret <4 x i64> %3
762}
763
764define <4 x i64> @commute_cmppd_le_ymmm(<4 x double>* %a0, <4 x double> %a1) {
765; SSE-LABEL: commute_cmppd_le_ymmm:
766; SSE:       # %bb.0:
767; SSE-NEXT:    movapd (%rdi), %xmm2
768; SSE-NEXT:    movapd 16(%rdi), %xmm3
769; SSE-NEXT:    cmplepd %xmm0, %xmm2
770; SSE-NEXT:    cmplepd %xmm1, %xmm3
771; SSE-NEXT:    movapd %xmm2, %xmm0
772; SSE-NEXT:    movapd %xmm3, %xmm1
773; SSE-NEXT:    retq
774;
775; AVX-LABEL: commute_cmppd_le_ymmm:
776; AVX:       # %bb.0:
777; AVX-NEXT:    vmovapd (%rdi), %ymm1
778; AVX-NEXT:    vcmplepd %ymm0, %ymm1, %ymm0
779; AVX-NEXT:    retq
780;
781; AVX512-LABEL: commute_cmppd_le_ymmm:
782; AVX512:       # %bb.0:
783; AVX512-NEXT:    vmovapd (%rdi), %ymm1
784; AVX512-NEXT:    vcmplepd %ymm0, %ymm1, %ymm0
785; AVX512-NEXT:    retq
786  %1 = load <4 x double>, <4 x double>* %a0
787  %2 = fcmp ole <4 x double> %1, %a1
788  %3 = sext <4 x i1> %2 to <4 x i64>
789  ret <4 x i64> %3
790}
791
792define <16 x i32> @commute_cmpps_eq_zmm(<16 x float>* %a0, <16 x float> %a1) {
793; SSE-LABEL: commute_cmpps_eq_zmm:
794; SSE:       # %bb.0:
795; SSE-NEXT:    cmpeqps (%rdi), %xmm0
796; SSE-NEXT:    cmpeqps 16(%rdi), %xmm1
797; SSE-NEXT:    cmpeqps 32(%rdi), %xmm2
798; SSE-NEXT:    cmpeqps 48(%rdi), %xmm3
799; SSE-NEXT:    retq
800;
801; AVX-LABEL: commute_cmpps_eq_zmm:
802; AVX:       # %bb.0:
803; AVX-NEXT:    vcmpeqps (%rdi), %ymm0, %ymm0
804; AVX-NEXT:    vcmpeqps 32(%rdi), %ymm1, %ymm1
805; AVX-NEXT:    retq
806;
807; AVX512-LABEL: commute_cmpps_eq_zmm:
808; AVX512:       # %bb.0:
809; AVX512-NEXT:    vcmpeqps (%rdi), %zmm0, %k0
810; AVX512-NEXT:    vpmovm2d %k0, %zmm0
811; AVX512-NEXT:    retq
812  %1 = load <16 x float>, <16 x float>* %a0
813  %2 = fcmp oeq <16 x float> %1, %a1
814  %3 = sext <16 x i1> %2 to <16 x i32>
815  ret <16 x i32> %3
816}
817
818define <16 x i32> @commute_cmpps_ne_zmm(<16 x float>* %a0, <16 x float> %a1) {
819; SSE-LABEL: commute_cmpps_ne_zmm:
820; SSE:       # %bb.0:
821; SSE-NEXT:    cmpneqps (%rdi), %xmm0
822; SSE-NEXT:    cmpneqps 16(%rdi), %xmm1
823; SSE-NEXT:    cmpneqps 32(%rdi), %xmm2
824; SSE-NEXT:    cmpneqps 48(%rdi), %xmm3
825; SSE-NEXT:    retq
826;
827; AVX-LABEL: commute_cmpps_ne_zmm:
828; AVX:       # %bb.0:
829; AVX-NEXT:    vcmpneqps (%rdi), %ymm0, %ymm0
830; AVX-NEXT:    vcmpneqps 32(%rdi), %ymm1, %ymm1
831; AVX-NEXT:    retq
832;
833; AVX512-LABEL: commute_cmpps_ne_zmm:
834; AVX512:       # %bb.0:
835; AVX512-NEXT:    vcmpneqps (%rdi), %zmm0, %k0
836; AVX512-NEXT:    vpmovm2d %k0, %zmm0
837; AVX512-NEXT:    retq
838  %1 = load <16 x float>, <16 x float>* %a0
839  %2 = fcmp une <16 x float> %1, %a1
840  %3 = sext <16 x i1> %2 to <16 x i32>
841  ret <16 x i32> %3
842}
843
844define <16 x i32> @commute_cmpps_ord_zmm(<16 x float>* %a0, <16 x float> %a1) {
845; SSE-LABEL: commute_cmpps_ord_zmm:
846; SSE:       # %bb.0:
847; SSE-NEXT:    cmpordps (%rdi), %xmm0
848; SSE-NEXT:    cmpordps 16(%rdi), %xmm1
849; SSE-NEXT:    cmpordps 32(%rdi), %xmm2
850; SSE-NEXT:    cmpordps 48(%rdi), %xmm3
851; SSE-NEXT:    retq
852;
853; AVX-LABEL: commute_cmpps_ord_zmm:
854; AVX:       # %bb.0:
855; AVX-NEXT:    vcmpordps (%rdi), %ymm0, %ymm0
856; AVX-NEXT:    vcmpordps 32(%rdi), %ymm1, %ymm1
857; AVX-NEXT:    retq
858;
859; AVX512-LABEL: commute_cmpps_ord_zmm:
860; AVX512:       # %bb.0:
861; AVX512-NEXT:    vcmpordps (%rdi), %zmm0, %k0
862; AVX512-NEXT:    vpmovm2d %k0, %zmm0
863; AVX512-NEXT:    retq
864  %1 = load <16 x float>, <16 x float>* %a0
865  %2 = fcmp ord <16 x float> %1, %a1
866  %3 = sext <16 x i1> %2 to <16 x i32>
867  ret <16 x i32> %3
868}
869
870define <16 x i32> @commute_cmpps_uno_zmm(<16 x float>* %a0, <16 x float> %a1) {
871; SSE-LABEL: commute_cmpps_uno_zmm:
872; SSE:       # %bb.0:
873; SSE-NEXT:    cmpunordps (%rdi), %xmm0
874; SSE-NEXT:    cmpunordps 16(%rdi), %xmm1
875; SSE-NEXT:    cmpunordps 32(%rdi), %xmm2
876; SSE-NEXT:    cmpunordps 48(%rdi), %xmm3
877; SSE-NEXT:    retq
878;
879; AVX-LABEL: commute_cmpps_uno_zmm:
880; AVX:       # %bb.0:
881; AVX-NEXT:    vcmpunordps (%rdi), %ymm0, %ymm0
882; AVX-NEXT:    vcmpunordps 32(%rdi), %ymm1, %ymm1
883; AVX-NEXT:    retq
884;
885; AVX512-LABEL: commute_cmpps_uno_zmm:
886; AVX512:       # %bb.0:
887; AVX512-NEXT:    vcmpunordps (%rdi), %zmm0, %k0
888; AVX512-NEXT:    vpmovm2d %k0, %zmm0
889; AVX512-NEXT:    retq
890  %1 = load <16 x float>, <16 x float>* %a0
891  %2 = fcmp uno <16 x float> %1, %a1
892  %3 = sext <16 x i1> %2 to <16 x i32>
893  ret <16 x i32> %3
894}
895
896define <16 x i32> @commute_cmpps_ueq_zmm(<16 x float>* %a0, <16 x float> %a1) {
897; SSE-LABEL: commute_cmpps_ueq_zmm:
898; SSE:       # %bb.0:
899; SSE-NEXT:    movaps (%rdi), %xmm7
900; SSE-NEXT:    movaps 16(%rdi), %xmm5
901; SSE-NEXT:    movaps 32(%rdi), %xmm6
902; SSE-NEXT:    movaps 48(%rdi), %xmm8
903; SSE-NEXT:    movaps %xmm7, %xmm4
904; SSE-NEXT:    cmpeqps %xmm0, %xmm4
905; SSE-NEXT:    cmpunordps %xmm7, %xmm0
906; SSE-NEXT:    orps %xmm4, %xmm0
907; SSE-NEXT:    movaps %xmm5, %xmm4
908; SSE-NEXT:    cmpeqps %xmm1, %xmm4
909; SSE-NEXT:    cmpunordps %xmm5, %xmm1
910; SSE-NEXT:    orps %xmm4, %xmm1
911; SSE-NEXT:    movaps %xmm6, %xmm4
912; SSE-NEXT:    cmpeqps %xmm2, %xmm4
913; SSE-NEXT:    cmpunordps %xmm6, %xmm2
914; SSE-NEXT:    orps %xmm4, %xmm2
915; SSE-NEXT:    movaps %xmm8, %xmm4
916; SSE-NEXT:    cmpeqps %xmm3, %xmm4
917; SSE-NEXT:    cmpunordps %xmm8, %xmm3
918; SSE-NEXT:    orps %xmm4, %xmm3
919; SSE-NEXT:    retq
920;
921; AVX-LABEL: commute_cmpps_ueq_zmm:
922; AVX:       # %bb.0:
923; AVX-NEXT:    vcmpeq_uqps (%rdi), %ymm0, %ymm0
924; AVX-NEXT:    vcmpeq_uqps 32(%rdi), %ymm1, %ymm1
925; AVX-NEXT:    retq
926;
927; AVX512-LABEL: commute_cmpps_ueq_zmm:
928; AVX512:       # %bb.0:
929; AVX512-NEXT:    vcmpeq_uqps (%rdi), %zmm0, %k0
930; AVX512-NEXT:    vpmovm2d %k0, %zmm0
931; AVX512-NEXT:    retq
932  %1 = load <16 x float>, <16 x float>* %a0
933  %2 = fcmp ueq <16 x float> %1, %a1
934  %3 = sext <16 x i1> %2 to <16 x i32>
935  ret <16 x i32> %3
936}
937
938define <16 x i32> @commute_cmpps_one_zmm(<16 x float>* %a0, <16 x float> %a1) {
939; SSE-LABEL: commute_cmpps_one_zmm:
940; SSE:       # %bb.0:
941; SSE-NEXT:    movaps (%rdi), %xmm7
942; SSE-NEXT:    movaps 16(%rdi), %xmm5
943; SSE-NEXT:    movaps 32(%rdi), %xmm6
944; SSE-NEXT:    movaps 48(%rdi), %xmm8
945; SSE-NEXT:    movaps %xmm7, %xmm4
946; SSE-NEXT:    cmpneqps %xmm0, %xmm4
947; SSE-NEXT:    cmpordps %xmm7, %xmm0
948; SSE-NEXT:    andps %xmm4, %xmm0
949; SSE-NEXT:    movaps %xmm5, %xmm4
950; SSE-NEXT:    cmpneqps %xmm1, %xmm4
951; SSE-NEXT:    cmpordps %xmm5, %xmm1
952; SSE-NEXT:    andps %xmm4, %xmm1
953; SSE-NEXT:    movaps %xmm6, %xmm4
954; SSE-NEXT:    cmpneqps %xmm2, %xmm4
955; SSE-NEXT:    cmpordps %xmm6, %xmm2
956; SSE-NEXT:    andps %xmm4, %xmm2
957; SSE-NEXT:    movaps %xmm8, %xmm4
958; SSE-NEXT:    cmpneqps %xmm3, %xmm4
959; SSE-NEXT:    cmpordps %xmm8, %xmm3
960; SSE-NEXT:    andps %xmm4, %xmm3
961; SSE-NEXT:    retq
962;
963; AVX-LABEL: commute_cmpps_one_zmm:
964; AVX:       # %bb.0:
965; AVX-NEXT:    vcmpneq_oqps (%rdi), %ymm0, %ymm0
966; AVX-NEXT:    vcmpneq_oqps 32(%rdi), %ymm1, %ymm1
967; AVX-NEXT:    retq
968;
969; AVX512-LABEL: commute_cmpps_one_zmm:
970; AVX512:       # %bb.0:
971; AVX512-NEXT:    vcmpneq_oqps (%rdi), %zmm0, %k0
972; AVX512-NEXT:    vpmovm2d %k0, %zmm0
973; AVX512-NEXT:    retq
974  %1 = load <16 x float>, <16 x float>* %a0
975  %2 = fcmp one <16 x float> %1, %a1
976  %3 = sext <16 x i1> %2 to <16 x i32>
977  ret <16 x i32> %3
978}
979
980define <16 x i32> @commute_cmpps_lt_zmm(<16 x float>* %a0, <16 x float> %a1) {
981; SSE-LABEL: commute_cmpps_lt_zmm:
982; SSE:       # %bb.0:
983; SSE-NEXT:    movaps (%rdi), %xmm4
984; SSE-NEXT:    movaps 16(%rdi), %xmm5
985; SSE-NEXT:    movaps 32(%rdi), %xmm6
986; SSE-NEXT:    movaps 48(%rdi), %xmm7
987; SSE-NEXT:    cmpltps %xmm0, %xmm4
988; SSE-NEXT:    cmpltps %xmm1, %xmm5
989; SSE-NEXT:    cmpltps %xmm2, %xmm6
990; SSE-NEXT:    cmpltps %xmm3, %xmm7
991; SSE-NEXT:    movaps %xmm4, %xmm0
992; SSE-NEXT:    movaps %xmm5, %xmm1
993; SSE-NEXT:    movaps %xmm6, %xmm2
994; SSE-NEXT:    movaps %xmm7, %xmm3
995; SSE-NEXT:    retq
996;
997; AVX-LABEL: commute_cmpps_lt_zmm:
998; AVX:       # %bb.0:
999; AVX-NEXT:    vmovaps (%rdi), %ymm2
1000; AVX-NEXT:    vmovaps 32(%rdi), %ymm3
1001; AVX-NEXT:    vcmpltps %ymm0, %ymm2, %ymm0
1002; AVX-NEXT:    vcmpltps %ymm1, %ymm3, %ymm1
1003; AVX-NEXT:    retq
1004;
1005; AVX512-LABEL: commute_cmpps_lt_zmm:
1006; AVX512:       # %bb.0:
1007; AVX512-NEXT:    vcmpgtps (%rdi), %zmm0, %k0
1008; AVX512-NEXT:    vpmovm2d %k0, %zmm0
1009; AVX512-NEXT:    retq
1010  %1 = load <16 x float>, <16 x float>* %a0
1011  %2 = fcmp olt <16 x float> %1, %a1
1012  %3 = sext <16 x i1> %2 to <16 x i32>
1013  ret <16 x i32> %3
1014}
1015
1016define <16 x i32> @commute_cmpps_le_zmm(<16 x float>* %a0, <16 x float> %a1) {
1017; SSE-LABEL: commute_cmpps_le_zmm:
1018; SSE:       # %bb.0:
1019; SSE-NEXT:    movaps (%rdi), %xmm4
1020; SSE-NEXT:    movaps 16(%rdi), %xmm5
1021; SSE-NEXT:    movaps 32(%rdi), %xmm6
1022; SSE-NEXT:    movaps 48(%rdi), %xmm7
1023; SSE-NEXT:    cmpleps %xmm0, %xmm4
1024; SSE-NEXT:    cmpleps %xmm1, %xmm5
1025; SSE-NEXT:    cmpleps %xmm2, %xmm6
1026; SSE-NEXT:    cmpleps %xmm3, %xmm7
1027; SSE-NEXT:    movaps %xmm4, %xmm0
1028; SSE-NEXT:    movaps %xmm5, %xmm1
1029; SSE-NEXT:    movaps %xmm6, %xmm2
1030; SSE-NEXT:    movaps %xmm7, %xmm3
1031; SSE-NEXT:    retq
1032;
1033; AVX-LABEL: commute_cmpps_le_zmm:
1034; AVX:       # %bb.0:
1035; AVX-NEXT:    vmovaps (%rdi), %ymm2
1036; AVX-NEXT:    vmovaps 32(%rdi), %ymm3
1037; AVX-NEXT:    vcmpleps %ymm0, %ymm2, %ymm0
1038; AVX-NEXT:    vcmpleps %ymm1, %ymm3, %ymm1
1039; AVX-NEXT:    retq
1040;
1041; AVX512-LABEL: commute_cmpps_le_zmm:
1042; AVX512:       # %bb.0:
1043; AVX512-NEXT:    vcmpgeps (%rdi), %zmm0, %k0
1044; AVX512-NEXT:    vpmovm2d %k0, %zmm0
1045; AVX512-NEXT:    retq
1046  %1 = load <16 x float>, <16 x float>* %a0
1047  %2 = fcmp ole <16 x float> %1, %a1
1048  %3 = sext <16 x i1> %2 to <16 x i32>
1049  ret <16 x i32> %3
1050}
1051
1052define <8 x i64> @commute_cmppd_eq_zmmm(<8 x double>* %a0, <8 x double> %a1) {
1053; SSE-LABEL: commute_cmppd_eq_zmmm:
1054; SSE:       # %bb.0:
1055; SSE-NEXT:    cmpeqpd (%rdi), %xmm0
1056; SSE-NEXT:    cmpeqpd 16(%rdi), %xmm1
1057; SSE-NEXT:    cmpeqpd 32(%rdi), %xmm2
1058; SSE-NEXT:    cmpeqpd 48(%rdi), %xmm3
1059; SSE-NEXT:    retq
1060;
1061; AVX-LABEL: commute_cmppd_eq_zmmm:
1062; AVX:       # %bb.0:
1063; AVX-NEXT:    vcmpeqpd (%rdi), %ymm0, %ymm0
1064; AVX-NEXT:    vcmpeqpd 32(%rdi), %ymm1, %ymm1
1065; AVX-NEXT:    retq
1066;
1067; AVX512-LABEL: commute_cmppd_eq_zmmm:
1068; AVX512:       # %bb.0:
1069; AVX512-NEXT:    vcmpeqpd (%rdi), %zmm0, %k0
1070; AVX512-NEXT:    vpmovm2q %k0, %zmm0
1071; AVX512-NEXT:    retq
1072  %1 = load <8 x double>, <8 x double>* %a0
1073  %2 = fcmp oeq <8 x double> %1, %a1
1074  %3 = sext <8 x i1> %2 to <8 x i64>
1075  ret <8 x i64> %3
1076}
1077
1078define <8 x i64> @commute_cmppd_ne_zmmm(<8 x double>* %a0, <8 x double> %a1) {
1079; SSE-LABEL: commute_cmppd_ne_zmmm:
1080; SSE:       # %bb.0:
1081; SSE-NEXT:    cmpneqpd (%rdi), %xmm0
1082; SSE-NEXT:    cmpneqpd 16(%rdi), %xmm1
1083; SSE-NEXT:    cmpneqpd 32(%rdi), %xmm2
1084; SSE-NEXT:    cmpneqpd 48(%rdi), %xmm3
1085; SSE-NEXT:    retq
1086;
1087; AVX-LABEL: commute_cmppd_ne_zmmm:
1088; AVX:       # %bb.0:
1089; AVX-NEXT:    vcmpneqpd (%rdi), %ymm0, %ymm0
1090; AVX-NEXT:    vcmpneqpd 32(%rdi), %ymm1, %ymm1
1091; AVX-NEXT:    retq
1092;
1093; AVX512-LABEL: commute_cmppd_ne_zmmm:
1094; AVX512:       # %bb.0:
1095; AVX512-NEXT:    vcmpneqpd (%rdi), %zmm0, %k0
1096; AVX512-NEXT:    vpmovm2q %k0, %zmm0
1097; AVX512-NEXT:    retq
1098  %1 = load <8 x double>, <8 x double>* %a0
1099  %2 = fcmp une <8 x double> %1, %a1
1100  %3 = sext <8 x i1> %2 to <8 x i64>
1101  ret <8 x i64> %3
1102}
1103
1104define <8 x i64> @commute_cmppd_ord_zmmm(<8 x double>* %a0, <8 x double> %a1) {
1105; SSE-LABEL: commute_cmppd_ord_zmmm:
1106; SSE:       # %bb.0:
1107; SSE-NEXT:    cmpordpd (%rdi), %xmm0
1108; SSE-NEXT:    cmpordpd 16(%rdi), %xmm1
1109; SSE-NEXT:    cmpordpd 32(%rdi), %xmm2
1110; SSE-NEXT:    cmpordpd 48(%rdi), %xmm3
1111; SSE-NEXT:    retq
1112;
1113; AVX-LABEL: commute_cmppd_ord_zmmm:
1114; AVX:       # %bb.0:
1115; AVX-NEXT:    vcmpordpd (%rdi), %ymm0, %ymm0
1116; AVX-NEXT:    vcmpordpd 32(%rdi), %ymm1, %ymm1
1117; AVX-NEXT:    retq
1118;
1119; AVX512-LABEL: commute_cmppd_ord_zmmm:
1120; AVX512:       # %bb.0:
1121; AVX512-NEXT:    vcmpordpd (%rdi), %zmm0, %k0
1122; AVX512-NEXT:    vpmovm2q %k0, %zmm0
1123; AVX512-NEXT:    retq
1124  %1 = load <8 x double>, <8 x double>* %a0
1125  %2 = fcmp ord <8 x double> %1, %a1
1126  %3 = sext <8 x i1> %2 to <8 x i64>
1127  ret <8 x i64> %3
1128}
1129
1130define <8 x i64> @commute_cmppd_uno_zmmm(<8 x double>* %a0, <8 x double> %a1) {
1131; SSE-LABEL: commute_cmppd_uno_zmmm:
1132; SSE:       # %bb.0:
1133; SSE-NEXT:    cmpunordpd (%rdi), %xmm0
1134; SSE-NEXT:    cmpunordpd 16(%rdi), %xmm1
1135; SSE-NEXT:    cmpunordpd 32(%rdi), %xmm2
1136; SSE-NEXT:    cmpunordpd 48(%rdi), %xmm3
1137; SSE-NEXT:    retq
1138;
1139; AVX-LABEL: commute_cmppd_uno_zmmm:
1140; AVX:       # %bb.0:
1141; AVX-NEXT:    vcmpunordpd (%rdi), %ymm0, %ymm0
1142; AVX-NEXT:    vcmpunordpd 32(%rdi), %ymm1, %ymm1
1143; AVX-NEXT:    retq
1144;
1145; AVX512-LABEL: commute_cmppd_uno_zmmm:
1146; AVX512:       # %bb.0:
1147; AVX512-NEXT:    vcmpunordpd (%rdi), %zmm0, %k0
1148; AVX512-NEXT:    vpmovm2q %k0, %zmm0
1149; AVX512-NEXT:    retq
1150  %1 = load <8 x double>, <8 x double>* %a0
1151  %2 = fcmp uno <8 x double> %1, %a1
1152  %3 = sext <8 x i1> %2 to <8 x i64>
1153  ret <8 x i64> %3
1154}
1155
1156define <8 x i64> @commute_cmppd_ueq_zmmm(<8 x double>* %a0, <8 x double> %a1) {
1157; SSE-LABEL: commute_cmppd_ueq_zmmm:
1158; SSE:       # %bb.0:
1159; SSE-NEXT:    movapd (%rdi), %xmm7
1160; SSE-NEXT:    movapd 16(%rdi), %xmm5
1161; SSE-NEXT:    movapd 32(%rdi), %xmm6
1162; SSE-NEXT:    movapd 48(%rdi), %xmm8
1163; SSE-NEXT:    movapd %xmm7, %xmm4
1164; SSE-NEXT:    cmpeqpd %xmm0, %xmm4
1165; SSE-NEXT:    cmpunordpd %xmm7, %xmm0
1166; SSE-NEXT:    orpd %xmm4, %xmm0
1167; SSE-NEXT:    movapd %xmm5, %xmm4
1168; SSE-NEXT:    cmpeqpd %xmm1, %xmm4
1169; SSE-NEXT:    cmpunordpd %xmm5, %xmm1
1170; SSE-NEXT:    orpd %xmm4, %xmm1
1171; SSE-NEXT:    movapd %xmm6, %xmm4
1172; SSE-NEXT:    cmpeqpd %xmm2, %xmm4
1173; SSE-NEXT:    cmpunordpd %xmm6, %xmm2
1174; SSE-NEXT:    orpd %xmm4, %xmm2
1175; SSE-NEXT:    movapd %xmm8, %xmm4
1176; SSE-NEXT:    cmpeqpd %xmm3, %xmm4
1177; SSE-NEXT:    cmpunordpd %xmm8, %xmm3
1178; SSE-NEXT:    orpd %xmm4, %xmm3
1179; SSE-NEXT:    retq
1180;
1181; AVX-LABEL: commute_cmppd_ueq_zmmm:
1182; AVX:       # %bb.0:
1183; AVX-NEXT:    vcmpeq_uqpd (%rdi), %ymm0, %ymm0
1184; AVX-NEXT:    vcmpeq_uqpd 32(%rdi), %ymm1, %ymm1
1185; AVX-NEXT:    retq
1186;
1187; AVX512-LABEL: commute_cmppd_ueq_zmmm:
1188; AVX512:       # %bb.0:
1189; AVX512-NEXT:    vcmpeq_uqpd (%rdi), %zmm0, %k0
1190; AVX512-NEXT:    vpmovm2q %k0, %zmm0
1191; AVX512-NEXT:    retq
1192  %1 = load <8 x double>, <8 x double>* %a0
1193  %2 = fcmp ueq <8 x double> %1, %a1
1194  %3 = sext <8 x i1> %2 to <8 x i64>
1195  ret <8 x i64> %3
1196}
1197
1198define <8 x i64> @commute_cmppd_one_zmmm(<8 x double>* %a0, <8 x double> %a1) {
1199; SSE-LABEL: commute_cmppd_one_zmmm:
1200; SSE:       # %bb.0:
1201; SSE-NEXT:    movapd (%rdi), %xmm7
1202; SSE-NEXT:    movapd 16(%rdi), %xmm5
1203; SSE-NEXT:    movapd 32(%rdi), %xmm6
1204; SSE-NEXT:    movapd 48(%rdi), %xmm8
1205; SSE-NEXT:    movapd %xmm7, %xmm4
1206; SSE-NEXT:    cmpneqpd %xmm0, %xmm4
1207; SSE-NEXT:    cmpordpd %xmm7, %xmm0
1208; SSE-NEXT:    andpd %xmm4, %xmm0
1209; SSE-NEXT:    movapd %xmm5, %xmm4
1210; SSE-NEXT:    cmpneqpd %xmm1, %xmm4
1211; SSE-NEXT:    cmpordpd %xmm5, %xmm1
1212; SSE-NEXT:    andpd %xmm4, %xmm1
1213; SSE-NEXT:    movapd %xmm6, %xmm4
1214; SSE-NEXT:    cmpneqpd %xmm2, %xmm4
1215; SSE-NEXT:    cmpordpd %xmm6, %xmm2
1216; SSE-NEXT:    andpd %xmm4, %xmm2
1217; SSE-NEXT:    movapd %xmm8, %xmm4
1218; SSE-NEXT:    cmpneqpd %xmm3, %xmm4
1219; SSE-NEXT:    cmpordpd %xmm8, %xmm3
1220; SSE-NEXT:    andpd %xmm4, %xmm3
1221; SSE-NEXT:    retq
1222;
1223; AVX-LABEL: commute_cmppd_one_zmmm:
1224; AVX:       # %bb.0:
1225; AVX-NEXT:    vcmpneq_oqpd (%rdi), %ymm0, %ymm0
1226; AVX-NEXT:    vcmpneq_oqpd 32(%rdi), %ymm1, %ymm1
1227; AVX-NEXT:    retq
1228;
1229; AVX512-LABEL: commute_cmppd_one_zmmm:
1230; AVX512:       # %bb.0:
1231; AVX512-NEXT:    vcmpneq_oqpd (%rdi), %zmm0, %k0
1232; AVX512-NEXT:    vpmovm2q %k0, %zmm0
1233; AVX512-NEXT:    retq
1234  %1 = load <8 x double>, <8 x double>* %a0
1235  %2 = fcmp one <8 x double> %1, %a1
1236  %3 = sext <8 x i1> %2 to <8 x i64>
1237  ret <8 x i64> %3
1238}
1239
1240define <8 x i64> @commute_cmppd_lt_zmmm(<8 x double>* %a0, <8 x double> %a1) {
1241; SSE-LABEL: commute_cmppd_lt_zmmm:
1242; SSE:       # %bb.0:
1243; SSE-NEXT:    movapd (%rdi), %xmm4
1244; SSE-NEXT:    movapd 16(%rdi), %xmm5
1245; SSE-NEXT:    movapd 32(%rdi), %xmm6
1246; SSE-NEXT:    movapd 48(%rdi), %xmm7
1247; SSE-NEXT:    cmpltpd %xmm0, %xmm4
1248; SSE-NEXT:    cmpltpd %xmm1, %xmm5
1249; SSE-NEXT:    cmpltpd %xmm2, %xmm6
1250; SSE-NEXT:    cmpltpd %xmm3, %xmm7
1251; SSE-NEXT:    movapd %xmm4, %xmm0
1252; SSE-NEXT:    movapd %xmm5, %xmm1
1253; SSE-NEXT:    movapd %xmm6, %xmm2
1254; SSE-NEXT:    movapd %xmm7, %xmm3
1255; SSE-NEXT:    retq
1256;
1257; AVX-LABEL: commute_cmppd_lt_zmmm:
1258; AVX:       # %bb.0:
1259; AVX-NEXT:    vmovapd (%rdi), %ymm2
1260; AVX-NEXT:    vmovapd 32(%rdi), %ymm3
1261; AVX-NEXT:    vcmpltpd %ymm0, %ymm2, %ymm0
1262; AVX-NEXT:    vcmpltpd %ymm1, %ymm3, %ymm1
1263; AVX-NEXT:    retq
1264;
1265; AVX512-LABEL: commute_cmppd_lt_zmmm:
1266; AVX512:       # %bb.0:
1267; AVX512-NEXT:    vcmpgtpd (%rdi), %zmm0, %k0
1268; AVX512-NEXT:    vpmovm2q %k0, %zmm0
1269; AVX512-NEXT:    retq
1270  %1 = load <8 x double>, <8 x double>* %a0
1271  %2 = fcmp olt <8 x double> %1, %a1
1272  %3 = sext <8 x i1> %2 to <8 x i64>
1273  ret <8 x i64> %3
1274}
1275
1276define <8 x i64> @commute_cmppd_le_zmmm(<8 x double>* %a0, <8 x double> %a1) {
1277; SSE-LABEL: commute_cmppd_le_zmmm:
1278; SSE:       # %bb.0:
1279; SSE-NEXT:    movapd (%rdi), %xmm4
1280; SSE-NEXT:    movapd 16(%rdi), %xmm5
1281; SSE-NEXT:    movapd 32(%rdi), %xmm6
1282; SSE-NEXT:    movapd 48(%rdi), %xmm7
1283; SSE-NEXT:    cmplepd %xmm0, %xmm4
1284; SSE-NEXT:    cmplepd %xmm1, %xmm5
1285; SSE-NEXT:    cmplepd %xmm2, %xmm6
1286; SSE-NEXT:    cmplepd %xmm3, %xmm7
1287; SSE-NEXT:    movapd %xmm4, %xmm0
1288; SSE-NEXT:    movapd %xmm5, %xmm1
1289; SSE-NEXT:    movapd %xmm6, %xmm2
1290; SSE-NEXT:    movapd %xmm7, %xmm3
1291; SSE-NEXT:    retq
1292;
1293; AVX-LABEL: commute_cmppd_le_zmmm:
1294; AVX:       # %bb.0:
1295; AVX-NEXT:    vmovapd (%rdi), %ymm2
1296; AVX-NEXT:    vmovapd 32(%rdi), %ymm3
1297; AVX-NEXT:    vcmplepd %ymm0, %ymm2, %ymm0
1298; AVX-NEXT:    vcmplepd %ymm1, %ymm3, %ymm1
1299; AVX-NEXT:    retq
1300;
1301; AVX512-LABEL: commute_cmppd_le_zmmm:
1302; AVX512:       # %bb.0:
1303; AVX512-NEXT:    vcmpgepd (%rdi), %zmm0, %k0
1304; AVX512-NEXT:    vpmovm2q %k0, %zmm0
1305; AVX512-NEXT:    retq
1306  %1 = load <8 x double>, <8 x double>* %a0
1307  %2 = fcmp ole <8 x double> %1, %a1
1308  %3 = sext <8 x i1> %2 to <8 x i64>
1309  ret <8 x i64> %3
1310}
1311