1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt < %s -instcombine -S | FileCheck %s
3
4define double @test_vfrcz_sd_0(double %a) {
5; CHECK-LABEL: @test_vfrcz_sd_0(
6; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> undef, double %a, i32 0
7; CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x double> @llvm.x86.xop.vfrcz.sd(<2 x double> [[TMP1]])
8; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <2 x double> [[TMP2]], i32 0
9; CHECK-NEXT:    ret double [[TMP3]]
10;
11  %1 = insertelement <2 x double> undef, double %a, i32 0
12  %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
13  %3 = tail call <2 x double> @llvm.x86.xop.vfrcz.sd(<2 x double> %2)
14  %4 = extractelement <2 x double> %3, i32 0
15  ret double %4
16}
17
18define double @test_vfrcz_sd_1(double %a) {
19; CHECK-LABEL: @test_vfrcz_sd_1(
20; CHECK-NEXT:    ret double 1.000000e+00
21;
22  %1 = insertelement <2 x double> undef, double %a, i32 0
23  %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
24  %3 = tail call <2 x double> @llvm.x86.xop.vfrcz.sd(<2 x double> %2)
25  %4 = extractelement <2 x double> %3, i32 1
26  ret double %4
27}
28
29define float @test_vfrcz_ss_0(float %a) {
30; CHECK-LABEL: @test_vfrcz_ss_0(
31; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
32; CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x float> @llvm.x86.xop.vfrcz.ss(<4 x float> [[TMP1]])
33; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x float> [[TMP2]], i32 0
34; CHECK-NEXT:    ret float [[TMP3]]
35;
36  %1 = insertelement <4 x float> undef, float %a, i32 0
37  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
38  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
39  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
40  %5 = tail call <4 x float> @llvm.x86.xop.vfrcz.ss(<4 x float> %4)
41  %6 = extractelement <4 x float> %5, i32 0
42  ret float %6
43}
44
45define float @test_vfrcz_ss_3(float %a) {
46; CHECK-LABEL: @test_vfrcz_ss_3(
47; CHECK-NEXT:    ret float 3.000000e+00
48;
49  %1 = insertelement <4 x float> undef, float %a, i32 0
50  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
51  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
52  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
53  %5 = tail call <4 x float> @llvm.x86.xop.vfrcz.ss(<4 x float> %4)
54  %6 = extractelement <4 x float> %5, i32 3
55  ret float %6
56}
57
58define <2 x i64> @cmp_slt_v2i64(<2 x i64> %a, <2 x i64> %b) {
59; CHECK-LABEL: @cmp_slt_v2i64(
60; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt <2 x i64> %a, %b
61; CHECK-NEXT:    [[TMP2:%.*]] = sext <2 x i1> [[TMP1]] to <2 x i64>
62; CHECK-NEXT:    ret <2 x i64> [[TMP2]]
63;
64  %1 = tail call <2 x i64> @llvm.x86.xop.vpcomltq(<2 x i64> %a, <2 x i64> %b)
65  ret <2 x i64> %1
66}
67
68define <2 x i64> @cmp_ult_v2i64(<2 x i64> %a, <2 x i64> %b) {
69; CHECK-LABEL: @cmp_ult_v2i64(
70; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult <2 x i64> %a, %b
71; CHECK-NEXT:    [[TMP2:%.*]] = sext <2 x i1> [[TMP1]] to <2 x i64>
72; CHECK-NEXT:    ret <2 x i64> [[TMP2]]
73;
74  %1 = tail call <2 x i64> @llvm.x86.xop.vpcomltuq(<2 x i64> %a, <2 x i64> %b)
75  ret <2 x i64> %1
76}
77
78define <2 x i64> @cmp_sle_v2i64(<2 x i64> %a, <2 x i64> %b) {
79; CHECK-LABEL: @cmp_sle_v2i64(
80; CHECK-NEXT:    [[TMP1:%.*]] = icmp sle <2 x i64> %a, %b
81; CHECK-NEXT:    [[TMP2:%.*]] = sext <2 x i1> [[TMP1]] to <2 x i64>
82; CHECK-NEXT:    ret <2 x i64> [[TMP2]]
83;
84  %1 = tail call <2 x i64> @llvm.x86.xop.vpcomleq(<2 x i64> %a, <2 x i64> %b)
85  ret <2 x i64> %1
86}
87
88define <2 x i64> @cmp_ule_v2i64(<2 x i64> %a, <2 x i64> %b) {
89; CHECK-LABEL: @cmp_ule_v2i64(
90; CHECK-NEXT:    [[TMP1:%.*]] = icmp ule <2 x i64> %a, %b
91; CHECK-NEXT:    [[TMP2:%.*]] = sext <2 x i1> [[TMP1]] to <2 x i64>
92; CHECK-NEXT:    ret <2 x i64> [[TMP2]]
93;
94  %1 = tail call <2 x i64> @llvm.x86.xop.vpcomleuq(<2 x i64> %a, <2 x i64> %b)
95  ret <2 x i64> %1
96}
97
98define <4 x i32> @cmp_sgt_v4i32(<4 x i32> %a, <4 x i32> %b) {
99; CHECK-LABEL: @cmp_sgt_v4i32(
100; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt <4 x i32> %a, %b
101; CHECK-NEXT:    [[TMP2:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i32>
102; CHECK-NEXT:    ret <4 x i32> [[TMP2]]
103;
104  %1 = tail call <4 x i32> @llvm.x86.xop.vpcomgtd(<4 x i32> %a, <4 x i32> %b)
105  ret <4 x i32> %1
106}
107
108define <4 x i32> @cmp_ugt_v4i32(<4 x i32> %a, <4 x i32> %b) {
109; CHECK-LABEL: @cmp_ugt_v4i32(
110; CHECK-NEXT:    [[TMP1:%.*]] = icmp ugt <4 x i32> %a, %b
111; CHECK-NEXT:    [[TMP2:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i32>
112; CHECK-NEXT:    ret <4 x i32> [[TMP2]]
113;
114  %1 = tail call <4 x i32> @llvm.x86.xop.vpcomgtud(<4 x i32> %a, <4 x i32> %b)
115  ret <4 x i32> %1
116}
117
118define <4 x i32> @cmp_sge_v4i32(<4 x i32> %a, <4 x i32> %b) {
119; CHECK-LABEL: @cmp_sge_v4i32(
120; CHECK-NEXT:    [[TMP1:%.*]] = icmp sge <4 x i32> %a, %b
121; CHECK-NEXT:    [[TMP2:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i32>
122; CHECK-NEXT:    ret <4 x i32> [[TMP2]]
123;
124  %1 = tail call <4 x i32> @llvm.x86.xop.vpcomged(<4 x i32> %a, <4 x i32> %b)
125  ret <4 x i32> %1
126}
127
128define <4 x i32> @cmp_uge_v4i32(<4 x i32> %a, <4 x i32> %b) {
129; CHECK-LABEL: @cmp_uge_v4i32(
130; CHECK-NEXT:    [[TMP1:%.*]] = icmp uge <4 x i32> %a, %b
131; CHECK-NEXT:    [[TMP2:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i32>
132; CHECK-NEXT:    ret <4 x i32> [[TMP2]]
133;
134  %1 = tail call <4 x i32> @llvm.x86.xop.vpcomgeud(<4 x i32> %a, <4 x i32> %b)
135  ret <4 x i32> %1
136}
137
138define <8 x i16> @cmp_seq_v8i16(<8 x i16> %a, <8 x i16> %b) {
139; CHECK-LABEL: @cmp_seq_v8i16(
140; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq <8 x i16> %a, %b
141; CHECK-NEXT:    [[TMP2:%.*]] = sext <8 x i1> [[TMP1]] to <8 x i16>
142; CHECK-NEXT:    ret <8 x i16> [[TMP2]]
143;
144  %1 = tail call <8 x i16> @llvm.x86.xop.vpcomeqw(<8 x i16> %a, <8 x i16> %b)
145  ret <8 x i16> %1
146}
147
148define <8 x i16> @cmp_ueq_v8i16(<8 x i16> %a, <8 x i16> %b) {
149; CHECK-LABEL: @cmp_ueq_v8i16(
150; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq <8 x i16> %a, %b
151; CHECK-NEXT:    [[TMP2:%.*]] = sext <8 x i1> [[TMP1]] to <8 x i16>
152; CHECK-NEXT:    ret <8 x i16> [[TMP2]]
153;
154  %1 = tail call <8 x i16> @llvm.x86.xop.vpcomequw(<8 x i16> %a, <8 x i16> %b)
155  ret <8 x i16> %1
156}
157
158define <8 x i16> @cmp_sne_v8i16(<8 x i16> %a, <8 x i16> %b) {
159; CHECK-LABEL: @cmp_sne_v8i16(
160; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne <8 x i16> %a, %b
161; CHECK-NEXT:    [[TMP2:%.*]] = sext <8 x i1> [[TMP1]] to <8 x i16>
162; CHECK-NEXT:    ret <8 x i16> [[TMP2]]
163;
164  %1 = tail call <8 x i16> @llvm.x86.xop.vpcomnew(<8 x i16> %a, <8 x i16> %b)
165  ret <8 x i16> %1
166}
167
168define <8 x i16> @cmp_une_v8i16(<8 x i16> %a, <8 x i16> %b) {
169; CHECK-LABEL: @cmp_une_v8i16(
170; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne <8 x i16> %a, %b
171; CHECK-NEXT:    [[TMP2:%.*]] = sext <8 x i1> [[TMP1]] to <8 x i16>
172; CHECK-NEXT:    ret <8 x i16> [[TMP2]]
173;
174  %1 = tail call <8 x i16> @llvm.x86.xop.vpcomneuw(<8 x i16> %a, <8 x i16> %b)
175  ret <8 x i16> %1
176}
177
178define <16 x i8> @cmp_strue_v16i8(<16 x i8> %a, <16 x i8> %b) {
179; CHECK-LABEL: @cmp_strue_v16i8(
180; CHECK-NEXT:    ret <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
181;
182  %1 = tail call <16 x i8> @llvm.x86.xop.vpcomtrueb(<16 x i8> %a, <16 x i8> %b)
183  ret <16 x i8> %1
184}
185
186define <16 x i8> @cmp_utrue_v16i8(<16 x i8> %a, <16 x i8> %b) {
187; CHECK-LABEL: @cmp_utrue_v16i8(
188; CHECK-NEXT:    ret <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
189;
190  %1 = tail call <16 x i8> @llvm.x86.xop.vpcomtrueub(<16 x i8> %a, <16 x i8> %b)
191  ret <16 x i8> %1
192}
193
194define <16 x i8> @cmp_sfalse_v16i8(<16 x i8> %a, <16 x i8> %b) {
195; CHECK-LABEL: @cmp_sfalse_v16i8(
196; CHECK-NEXT:    ret <16 x i8> zeroinitializer
197;
198  %1 = tail call <16 x i8> @llvm.x86.xop.vpcomfalseb(<16 x i8> %a, <16 x i8> %b)
199  ret <16 x i8> %1
200}
201
202define <16 x i8> @cmp_ufalse_v16i8(<16 x i8> %a, <16 x i8> %b) {
203; CHECK-LABEL: @cmp_ufalse_v16i8(
204; CHECK-NEXT:    ret <16 x i8> zeroinitializer
205;
206  %1 = tail call <16 x i8> @llvm.x86.xop.vpcomfalseub(<16 x i8> %a, <16 x i8> %b)
207  ret <16 x i8> %1
208}
209
210declare <2 x double> @llvm.x86.xop.vfrcz.sd(<2 x double>) nounwind readnone
211declare <4 x float> @llvm.x86.xop.vfrcz.ss(<4 x float>) nounwind readnone
212
213declare <16 x i8> @llvm.x86.xop.vpcomltb(<16 x i8>, <16 x i8>) nounwind readnone
214declare <8 x i16> @llvm.x86.xop.vpcomltw(<8 x i16>, <8 x i16>) nounwind readnone
215declare <4 x i32> @llvm.x86.xop.vpcomltd(<4 x i32>, <4 x i32>) nounwind readnone
216declare <2 x i64> @llvm.x86.xop.vpcomltq(<2 x i64>, <2 x i64>) nounwind readnone
217declare <16 x i8> @llvm.x86.xop.vpcomltub(<16 x i8>, <16 x i8>) nounwind readnone
218declare <8 x i16> @llvm.x86.xop.vpcomltuw(<8 x i16>, <8 x i16>) nounwind readnone
219declare <4 x i32> @llvm.x86.xop.vpcomltud(<4 x i32>, <4 x i32>) nounwind readnone
220declare <2 x i64> @llvm.x86.xop.vpcomltuq(<2 x i64>, <2 x i64>) nounwind readnone
221
222declare <16 x i8> @llvm.x86.xop.vpcomleb(<16 x i8>, <16 x i8>) nounwind readnone
223declare <8 x i16> @llvm.x86.xop.vpcomlew(<8 x i16>, <8 x i16>) nounwind readnone
224declare <4 x i32> @llvm.x86.xop.vpcomled(<4 x i32>, <4 x i32>) nounwind readnone
225declare <2 x i64> @llvm.x86.xop.vpcomleq(<2 x i64>, <2 x i64>) nounwind readnone
226declare <16 x i8> @llvm.x86.xop.vpcomleub(<16 x i8>, <16 x i8>) nounwind readnone
227declare <8 x i16> @llvm.x86.xop.vpcomleuw(<8 x i16>, <8 x i16>) nounwind readnone
228declare <4 x i32> @llvm.x86.xop.vpcomleud(<4 x i32>, <4 x i32>) nounwind readnone
229declare <2 x i64> @llvm.x86.xop.vpcomleuq(<2 x i64>, <2 x i64>) nounwind readnone
230
231declare <16 x i8> @llvm.x86.xop.vpcomgtb(<16 x i8>, <16 x i8>) nounwind readnone
232declare <8 x i16> @llvm.x86.xop.vpcomgtw(<8 x i16>, <8 x i16>) nounwind readnone
233declare <4 x i32> @llvm.x86.xop.vpcomgtd(<4 x i32>, <4 x i32>) nounwind readnone
234declare <2 x i64> @llvm.x86.xop.vpcomgtq(<2 x i64>, <2 x i64>) nounwind readnone
235declare <16 x i8> @llvm.x86.xop.vpcomgtub(<16 x i8>, <16 x i8>) nounwind readnone
236declare <8 x i16> @llvm.x86.xop.vpcomgtuw(<8 x i16>, <8 x i16>) nounwind readnone
237declare <4 x i32> @llvm.x86.xop.vpcomgtud(<4 x i32>, <4 x i32>) nounwind readnone
238declare <2 x i64> @llvm.x86.xop.vpcomgtuq(<2 x i64>, <2 x i64>) nounwind readnone
239
240declare <16 x i8> @llvm.x86.xop.vpcomgeb(<16 x i8>, <16 x i8>) nounwind readnone
241declare <8 x i16> @llvm.x86.xop.vpcomgew(<8 x i16>, <8 x i16>) nounwind readnone
242declare <4 x i32> @llvm.x86.xop.vpcomged(<4 x i32>, <4 x i32>) nounwind readnone
243declare <2 x i64> @llvm.x86.xop.vpcomgeq(<2 x i64>, <2 x i64>) nounwind readnone
244declare <16 x i8> @llvm.x86.xop.vpcomgeub(<16 x i8>, <16 x i8>) nounwind readnone
245declare <8 x i16> @llvm.x86.xop.vpcomgeuw(<8 x i16>, <8 x i16>) nounwind readnone
246declare <4 x i32> @llvm.x86.xop.vpcomgeud(<4 x i32>, <4 x i32>) nounwind readnone
247declare <2 x i64> @llvm.x86.xop.vpcomgeuq(<2 x i64>, <2 x i64>) nounwind readnone
248
249declare <16 x i8> @llvm.x86.xop.vpcomeqb(<16 x i8>, <16 x i8>) nounwind readnone
250declare <8 x i16> @llvm.x86.xop.vpcomeqw(<8 x i16>, <8 x i16>) nounwind readnone
251declare <4 x i32> @llvm.x86.xop.vpcomeqd(<4 x i32>, <4 x i32>) nounwind readnone
252declare <2 x i64> @llvm.x86.xop.vpcomeqq(<2 x i64>, <2 x i64>) nounwind readnone
253declare <16 x i8> @llvm.x86.xop.vpcomequb(<16 x i8>, <16 x i8>) nounwind readnone
254declare <8 x i16> @llvm.x86.xop.vpcomequw(<8 x i16>, <8 x i16>) nounwind readnone
255declare <4 x i32> @llvm.x86.xop.vpcomequd(<4 x i32>, <4 x i32>) nounwind readnone
256declare <2 x i64> @llvm.x86.xop.vpcomequq(<2 x i64>, <2 x i64>) nounwind readnone
257
258declare <16 x i8> @llvm.x86.xop.vpcomneb(<16 x i8>, <16 x i8>) nounwind readnone
259declare <8 x i16> @llvm.x86.xop.vpcomnew(<8 x i16>, <8 x i16>) nounwind readnone
260declare <4 x i32> @llvm.x86.xop.vpcomned(<4 x i32>, <4 x i32>) nounwind readnone
261declare <2 x i64> @llvm.x86.xop.vpcomneq(<2 x i64>, <2 x i64>) nounwind readnone
262declare <16 x i8> @llvm.x86.xop.vpcomneub(<16 x i8>, <16 x i8>) nounwind readnone
263declare <8 x i16> @llvm.x86.xop.vpcomneuw(<8 x i16>, <8 x i16>) nounwind readnone
264declare <4 x i32> @llvm.x86.xop.vpcomneud(<4 x i32>, <4 x i32>) nounwind readnone
265declare <2 x i64> @llvm.x86.xop.vpcomneuq(<2 x i64>, <2 x i64>) nounwind readnone
266
267declare <16 x i8> @llvm.x86.xop.vpcomfalseb(<16 x i8>, <16 x i8>) nounwind readnone
268declare <8 x i16> @llvm.x86.xop.vpcomfalsew(<8 x i16>, <8 x i16>) nounwind readnone
269declare <4 x i32> @llvm.x86.xop.vpcomfalsed(<4 x i32>, <4 x i32>) nounwind readnone
270declare <2 x i64> @llvm.x86.xop.vpcomfalseq(<2 x i64>, <2 x i64>) nounwind readnone
271declare <16 x i8> @llvm.x86.xop.vpcomfalseub(<16 x i8>, <16 x i8>) nounwind readnone
272declare <8 x i16> @llvm.x86.xop.vpcomfalseuw(<8 x i16>, <8 x i16>) nounwind readnone
273declare <4 x i32> @llvm.x86.xop.vpcomfalseud(<4 x i32>, <4 x i32>) nounwind readnone
274declare <2 x i64> @llvm.x86.xop.vpcomfalseuq(<2 x i64>, <2 x i64>) nounwind readnone
275
276declare <16 x i8> @llvm.x86.xop.vpcomtrueb(<16 x i8>, <16 x i8>) nounwind readnone
277declare <4 x i32> @llvm.x86.xop.vpcomtrued(<4 x i32>, <4 x i32>) nounwind readnone
278declare <2 x i64> @llvm.x86.xop.vpcomtrueq(<2 x i64>, <2 x i64>) nounwind readnone
279declare <8 x i16> @llvm.x86.xop.vpcomtruew(<8 x i16>, <8 x i16>) nounwind readnone
280declare <16 x i8> @llvm.x86.xop.vpcomtrueub(<16 x i8>, <16 x i8>) nounwind readnone
281declare <8 x i16> @llvm.x86.xop.vpcomtrueuw(<8 x i16>, <8 x i16>) nounwind readnone
282declare <4 x i32> @llvm.x86.xop.vpcomtrueud(<4 x i32>, <4 x i32>) nounwind readnone
283declare <2 x i64> @llvm.x86.xop.vpcomtrueuq(<2 x i64>, <2 x i64>) nounwind readnone
284