1; RUN: llc -O3 -mtriple=x86_64-unknown -mcpu=x86-64 -mattr=+sse2 < %s | FileCheck %s --check-prefix=SSE
2; RUN: llc -O3 -mtriple=x86_64-unknown -mcpu=x86-64 -mattr=+avx2 < %s | FileCheck %s --check-prefix=AVX
3
4;
5; Float Comparisons
6; Only equal/not-equal/ordered/unordered can be safely commuted
7;
8
9define <4 x i32> @commute_cmpps_eq(<4 x float>* %a0, <4 x float> %a1) #0 {
10  ;SSE-LABEL: commute_cmpps_eq
11  ;SSE:       cmpeqps (%rdi), %xmm0
12  ;SSE-NEXT:  retq
13
14  ;AVX-LABEL: commute_cmpps_eq
15  ;AVX:       vcmpeqps (%rdi), %xmm0, %xmm0
16  ;AVX-NEXT:  retq
17
18  %1 = load <4 x float>, <4 x float>* %a0
19  %2 = fcmp oeq <4 x float> %1, %a1
20  %3 = sext <4 x i1> %2 to <4 x i32>
21  ret <4 x i32> %3
22}
23
24define <4 x i32> @commute_cmpps_ne(<4 x float>* %a0, <4 x float> %a1) #0 {
25  ;SSE-LABEL: commute_cmpps_ne
26  ;SSE:       cmpneqps (%rdi), %xmm0
27  ;SSE-NEXT:  retq
28
29  ;AVX-LABEL: commute_cmpps_ne
30  ;AVX:       vcmpneqps (%rdi), %xmm0, %xmm0
31  ;AVX-NEXT:  retq
32
33  %1 = load <4 x float>, <4 x float>* %a0
34  %2 = fcmp une <4 x float> %1, %a1
35  %3 = sext <4 x i1> %2 to <4 x i32>
36  ret <4 x i32> %3
37}
38
39define <4 x i32> @commute_cmpps_ord(<4 x float>* %a0, <4 x float> %a1) #0 {
40  ;SSE-LABEL: commute_cmpps_ord
41  ;SSE:       cmpordps (%rdi), %xmm0
42  ;SSE-NEXT:  retq
43
44  ;AVX-LABEL: commute_cmpps_ord
45  ;AVX:       vcmpordps (%rdi), %xmm0, %xmm0
46  ;AVX-NEXT:  retq
47
48  %1 = load <4 x float>, <4 x float>* %a0
49  %2 = fcmp ord <4 x float> %1, %a1
50  %3 = sext <4 x i1> %2 to <4 x i32>
51  ret <4 x i32> %3
52}
53
54define <4 x i32> @commute_cmpps_uno(<4 x float>* %a0, <4 x float> %a1) #0 {
55  ;SSE-LABEL: commute_cmpps_uno
56  ;SSE:       cmpunordps (%rdi), %xmm0
57  ;SSE-NEXT:  retq
58
59  ;AVX-LABEL: commute_cmpps_uno
60  ;AVX:       vcmpunordps (%rdi), %xmm0, %xmm0
61  ;AVX-NEXT:  retq
62
63  %1 = load <4 x float>, <4 x float>* %a0
64  %2 = fcmp uno <4 x float> %1, %a1
65  %3 = sext <4 x i1> %2 to <4 x i32>
66  ret <4 x i32> %3
67}
68
69define <4 x i32> @commute_cmpps_lt(<4 x float>* %a0, <4 x float> %a1) #0 {
70  ;SSE-LABEL: commute_cmpps_lt
71  ;SSE:       movaps (%rdi), %xmm1
72  ;SSE-NEXT:  cmpltps %xmm0, %xmm1
73  ;SSE-NEXT:  movaps %xmm1, %xmm0
74  ;SSE-NEXT:  retq
75
76  ;AVX-LABEL: commute_cmpps_lt
77  ;AVX:       vmovaps (%rdi), %xmm1
78  ;AVX-NEXT:  vcmpltps %xmm0, %xmm1, %xmm0
79  ;AVX-NEXT:  retq
80
81  %1 = load <4 x float>, <4 x float>* %a0
82  %2 = fcmp olt <4 x float> %1, %a1
83  %3 = sext <4 x i1> %2 to <4 x i32>
84  ret <4 x i32> %3
85}
86
87define <4 x i32> @commute_cmpps_le(<4 x float>* %a0, <4 x float> %a1) #0 {
88  ;SSE-LABEL: commute_cmpps_le
89  ;SSE:       movaps (%rdi), %xmm1
90  ;SSE-NEXT:  cmpleps %xmm0, %xmm1
91  ;SSE-NEXT:  movaps %xmm1, %xmm0
92  ;SSE-NEXT:  retq
93
94  ;AVX-LABEL: commute_cmpps_le
95  ;AVX:       vmovaps (%rdi), %xmm1
96  ;AVX-NEXT:  vcmpleps %xmm0, %xmm1, %xmm0
97  ;AVX-NEXT:  retq
98
99  %1 = load <4 x float>, <4 x float>* %a0
100  %2 = fcmp ole <4 x float> %1, %a1
101  %3 = sext <4 x i1> %2 to <4 x i32>
102  ret <4 x i32> %3
103}
104
105define <8 x i32> @commute_cmpps_eq_ymm(<8 x float>* %a0, <8 x float> %a1) #0 {
106  ;AVX-LABEL: commute_cmpps_eq_ymm
107  ;AVX:       vcmpeqps (%rdi), %ymm0, %ymm0
108  ;AVX-NEXT:  retq
109
110  %1 = load <8 x float>, <8 x float>* %a0
111  %2 = fcmp oeq <8 x float> %1, %a1
112  %3 = sext <8 x i1> %2 to <8 x i32>
113  ret <8 x i32> %3
114}
115
116define <8 x i32> @commute_cmpps_ne_ymm(<8 x float>* %a0, <8 x float> %a1) #0 {
117  ;AVX-LABEL: commute_cmpps_ne_ymm
118  ;AVX:       vcmpneqps (%rdi), %ymm0, %ymm0
119  ;AVX-NEXT:  retq
120
121  %1 = load <8 x float>, <8 x float>* %a0
122  %2 = fcmp une <8 x float> %1, %a1
123  %3 = sext <8 x i1> %2 to <8 x i32>
124  ret <8 x i32> %3
125}
126
127define <8 x i32> @commute_cmpps_ord_ymm(<8 x float>* %a0, <8 x float> %a1) #0 {
128  ;AVX-LABEL: commute_cmpps_ord_ymm
129  ;AVX:       vcmpordps (%rdi), %ymm0, %ymm0
130  ;AVX-NEXT:  retq
131
132  %1 = load <8 x float>, <8 x float>* %a0
133  %2 = fcmp ord <8 x float> %1, %a1
134  %3 = sext <8 x i1> %2 to <8 x i32>
135  ret <8 x i32> %3
136}
137
138define <8 x i32> @commute_cmpps_uno_ymm(<8 x float>* %a0, <8 x float> %a1) #0 {
139  ;AVX-LABEL: commute_cmpps_uno_ymm
140  ;AVX:       vcmpunordps (%rdi), %ymm0, %ymm0
141  ;AVX-NEXT:  retq
142
143  %1 = load <8 x float>, <8 x float>* %a0
144  %2 = fcmp uno <8 x float> %1, %a1
145  %3 = sext <8 x i1> %2 to <8 x i32>
146  ret <8 x i32> %3
147}
148
149define <8 x i32> @commute_cmpps_lt_ymm(<8 x float>* %a0, <8 x float> %a1) #0 {
150  ;AVX-LABEL: commute_cmpps_lt_ymm
151  ;AVX:       vmovaps (%rdi), %ymm1
152  ;AVX-NEXT:  vcmpltps %ymm0, %ymm1, %ymm0
153  ;AVX-NEXT:  retq
154
155  %1 = load <8 x float>, <8 x float>* %a0
156  %2 = fcmp olt <8 x float> %1, %a1
157  %3 = sext <8 x i1> %2 to <8 x i32>
158  ret <8 x i32> %3
159}
160
161define <8 x i32> @commute_cmpps_le_ymm(<8 x float>* %a0, <8 x float> %a1) #0 {
162  ;AVX-LABEL: commute_cmpps_le_ymm
163  ;AVX:       vmovaps (%rdi), %ymm1
164  ;AVX-NEXT:  vcmpleps %ymm0, %ymm1, %ymm0
165  ;AVX-NEXT:  retq
166
167  %1 = load <8 x float>, <8 x float>* %a0
168  %2 = fcmp ole <8 x float> %1, %a1
169  %3 = sext <8 x i1> %2 to <8 x i32>
170  ret <8 x i32> %3
171}
172
173;
174; Double Comparisons
175; Only equal/not-equal/ordered/unordered can be safely commuted
176;
177
178define <2 x i64> @commute_cmppd_eq(<2 x double>* %a0, <2 x double> %a1) #0 {
179  ;SSE-LABEL: commute_cmppd_eq
180  ;SSE:       cmpeqpd (%rdi), %xmm0
181  ;SSE-NEXT:  retq
182
183  ;AVX-LABEL: commute_cmppd_eq
184  ;AVX:       vcmpeqpd (%rdi), %xmm0, %xmm0
185  ;AVX-NEXT:  retq
186
187  %1 = load <2 x double>, <2 x double>* %a0
188  %2 = fcmp oeq <2 x double> %1, %a1
189  %3 = sext <2 x i1> %2 to <2 x i64>
190  ret <2 x i64> %3
191}
192
193define <2 x i64> @commute_cmppd_ne(<2 x double>* %a0, <2 x double> %a1) #0 {
194  ;SSE-LABEL: commute_cmppd_ne
195  ;SSE:       cmpneqpd (%rdi), %xmm0
196  ;SSE-NEXT:  retq
197
198  ;AVX-LABEL: commute_cmppd_ne
199  ;AVX:       vcmpneqpd (%rdi), %xmm0, %xmm0
200  ;AVX-NEXT:  retq
201
202  %1 = load <2 x double>, <2 x double>* %a0
203  %2 = fcmp une <2 x double> %1, %a1
204  %3 = sext <2 x i1> %2 to <2 x i64>
205  ret <2 x i64> %3
206}
207
208define <2 x i64> @commute_cmppd_ord(<2 x double>* %a0, <2 x double> %a1) #0 {
209  ;SSE-LABEL: commute_cmppd_ord
210  ;SSE:       cmpordpd (%rdi), %xmm0
211  ;SSE-NEXT:  retq
212
213  ;AVX-LABEL: commute_cmppd_ord
214  ;AVX:       vcmpordpd (%rdi), %xmm0, %xmm0
215  ;AVX-NEXT:  retq
216
217  %1 = load <2 x double>, <2 x double>* %a0
218  %2 = fcmp ord <2 x double> %1, %a1
219  %3 = sext <2 x i1> %2 to <2 x i64>
220  ret <2 x i64> %3
221}
222
223define <2 x i64> @commute_cmppd_uno(<2 x double>* %a0, <2 x double> %a1) #0 {
224  ;SSE-LABEL: commute_cmppd_uno
225  ;SSE:       cmpunordpd (%rdi), %xmm0
226  ;SSE-NEXT:  retq
227
228  ;AVX-LABEL: commute_cmppd_uno
229  ;AVX:       vcmpunordpd (%rdi), %xmm0, %xmm0
230  ;AVX-NEXT:  retq
231
232  %1 = load <2 x double>, <2 x double>* %a0
233  %2 = fcmp uno <2 x double> %1, %a1
234  %3 = sext <2 x i1> %2 to <2 x i64>
235  ret <2 x i64> %3
236}
237
238define <2 x i64> @commute_cmppd_lt(<2 x double>* %a0, <2 x double> %a1) #0 {
239  ;SSE-LABEL: commute_cmppd_lt
240  ;SSE:       movapd (%rdi), %xmm1
241  ;SSE-NEXT:  cmpltpd %xmm0, %xmm1
242  ;SSE-NEXT:  movapd %xmm1, %xmm0
243  ;SSE-NEXT:  retq
244
245  ;AVX-LABEL: commute_cmppd_lt
246  ;AVX:       vmovapd (%rdi), %xmm1
247  ;AVX-NEXT:  vcmpltpd %xmm0, %xmm1, %xmm0
248  ;AVX-NEXT:  retq
249
250  %1 = load <2 x double>, <2 x double>* %a0
251  %2 = fcmp olt <2 x double> %1, %a1
252  %3 = sext <2 x i1> %2 to <2 x i64>
253  ret <2 x i64> %3
254}
255
256define <2 x i64> @commute_cmppd_le(<2 x double>* %a0, <2 x double> %a1) #0 {
257  ;SSE-LABEL: commute_cmppd_le
258  ;SSE:       movapd (%rdi), %xmm1
259  ;SSE-NEXT:  cmplepd %xmm0, %xmm1
260  ;SSE-NEXT:  movapd %xmm1, %xmm0
261  ;SSE-NEXT:  retq
262
263  ;AVX-LABEL: commute_cmppd_le
264  ;AVX:       vmovapd (%rdi), %xmm1
265  ;AVX-NEXT:  vcmplepd %xmm0, %xmm1, %xmm0
266  ;AVX-NEXT:  retq
267
268  %1 = load <2 x double>, <2 x double>* %a0
269  %2 = fcmp ole <2 x double> %1, %a1
270  %3 = sext <2 x i1> %2 to <2 x i64>
271  ret <2 x i64> %3
272}
273
274define <4 x i64> @commute_cmppd_eq_ymmm(<4 x double>* %a0, <4 x double> %a1) #0 {
275  ;AVX-LABEL: commute_cmppd_eq
276  ;AVX:       vcmpeqpd (%rdi), %ymm0, %ymm0
277  ;AVX-NEXT:  retq
278
279  %1 = load <4 x double>, <4 x double>* %a0
280  %2 = fcmp oeq <4 x double> %1, %a1
281  %3 = sext <4 x i1> %2 to <4 x i64>
282  ret <4 x i64> %3
283}
284
285define <4 x i64> @commute_cmppd_ne_ymmm(<4 x double>* %a0, <4 x double> %a1) #0 {
286  ;AVX-LABEL: commute_cmppd_ne
287  ;AVX:       vcmpneqpd (%rdi), %ymm0, %ymm0
288  ;AVX-NEXT:  retq
289
290  %1 = load <4 x double>, <4 x double>* %a0
291  %2 = fcmp une <4 x double> %1, %a1
292  %3 = sext <4 x i1> %2 to <4 x i64>
293  ret <4 x i64> %3
294}
295
296define <4 x i64> @commute_cmppd_ord_ymmm(<4 x double>* %a0, <4 x double> %a1) #0 {
297  ;AVX-LABEL: commute_cmppd_ord
298  ;AVX:       vcmpordpd (%rdi), %ymm0, %ymm0
299  ;AVX-NEXT:  retq
300
301  %1 = load <4 x double>, <4 x double>* %a0
302  %2 = fcmp ord <4 x double> %1, %a1
303  %3 = sext <4 x i1> %2 to <4 x i64>
304  ret <4 x i64> %3
305}
306
307define <4 x i64> @commute_cmppd_uno_ymmm(<4 x double>* %a0, <4 x double> %a1) #0 {
308  ;AVX-LABEL: commute_cmppd_uno
309  ;AVX:       vcmpunordpd (%rdi), %ymm0, %ymm0
310  ;AVX-NEXT:  retq
311
312  %1 = load <4 x double>, <4 x double>* %a0
313  %2 = fcmp uno <4 x double> %1, %a1
314  %3 = sext <4 x i1> %2 to <4 x i64>
315  ret <4 x i64> %3
316}
317
318define <4 x i64> @commute_cmppd_lt_ymmm(<4 x double>* %a0, <4 x double> %a1) #0 {
319  ;AVX-LABEL: commute_cmppd_lt
320  ;AVX:       vmovapd (%rdi), %ymm1
321  ;AVX-NEXT:  vcmpltpd %ymm0, %ymm1, %ymm0
322  ;AVX-NEXT:  retq
323
324  %1 = load <4 x double>, <4 x double>* %a0
325  %2 = fcmp olt <4 x double> %1, %a1
326  %3 = sext <4 x i1> %2 to <4 x i64>
327  ret <4 x i64> %3
328}
329
330define <4 x i64> @commute_cmppd_le_ymmm(<4 x double>* %a0, <4 x double> %a1) #0 {
331  ;AVX-LABEL: commute_cmppd_le
332  ;AVX:       vmovapd (%rdi), %ymm1
333  ;AVX-NEXT:  vcmplepd %ymm0, %ymm1, %ymm0
334  ;AVX-NEXT:  retq
335
336  %1 = load <4 x double>, <4 x double>* %a0
337  %2 = fcmp ole <4 x double> %1, %a1
338  %3 = sext <4 x i1> %2 to <4 x i64>
339  ret <4 x i64> %3
340}
341