1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma4,+xop | FileCheck %s
3
4define <2 x double> @test_int_x86_xop_vpermil2pd(<2 x double> %a0, <2 x double> %a1, <2 x i64> %a2) {
5; CHECK-LABEL: test_int_x86_xop_vpermil2pd:
6; CHECK:       # %bb.0:
7; CHECK-NEXT:    vpermil2pd $1, %xmm2, %xmm1, %xmm0, %xmm0
8; CHECK-NEXT:    retq
9  %res = call <2 x double> @llvm.x86.xop.vpermil2pd(<2 x double> %a0, <2 x double> %a1, <2 x i64> %a2, i8 1) ;  [#uses=1]
10  ret <2 x double> %res
11}
12define <2 x double> @test_int_x86_xop_vpermil2pd_mr(<2 x double> %a0, <2 x double>* %a1, <2 x i64> %a2) {
13; CHECK-LABEL: test_int_x86_xop_vpermil2pd_mr:
14; CHECK:       # %bb.0:
15; CHECK-NEXT:    vpermil2pd $1, %xmm1, (%rdi), %xmm0, %xmm0
16; CHECK-NEXT:    retq
17  %vec = load <2 x double>, <2 x double>* %a1
18  %res = call <2 x double> @llvm.x86.xop.vpermil2pd(<2 x double> %a0, <2 x double> %vec, <2 x i64> %a2, i8 1) ;  [#uses=1]
19  ret <2 x double> %res
20}
21define <2 x double> @test_int_x86_xop_vpermil2pd_rm(<2 x double> %a0, <2 x double> %a1, <2 x i64>* %a2) {
22; CHECK-LABEL: test_int_x86_xop_vpermil2pd_rm:
23; CHECK:       # %bb.0:
24; CHECK-NEXT:    vpermil2pd $1, (%rdi), %xmm1, %xmm0, %xmm0
25; CHECK-NEXT:    retq
26  %vec = load <2 x i64>, <2 x i64>* %a2
27  %res = call <2 x double> @llvm.x86.xop.vpermil2pd(<2 x double> %a0, <2 x double> %a1, <2 x i64> %vec, i8 1) ;  [#uses=1]
28  ret <2 x double> %res
29}
30declare <2 x double> @llvm.x86.xop.vpermil2pd(<2 x double>, <2 x double>, <2 x i64>, i8) nounwind readnone
31
32define <4 x double> @test_int_x86_xop_vpermil2pd_256(<4 x double> %a0, <4 x double> %a1, <4 x i64> %a2) {
33; CHECK-LABEL: test_int_x86_xop_vpermil2pd_256:
34; CHECK:       # %bb.0:
35; CHECK-NEXT:    vpermil2pd $2, %ymm2, %ymm1, %ymm0, %ymm0
36; CHECK-NEXT:    retq
37  %res = call <4 x double> @llvm.x86.xop.vpermil2pd.256(<4 x double> %a0, <4 x double> %a1, <4 x i64> %a2, i8 2) ;
38  ret <4 x double> %res
39}
40define <4 x double> @test_int_x86_xop_vpermil2pd_256_mr(<4 x double> %a0, <4 x double>* %a1, <4 x i64> %a2) {
41; CHECK-LABEL: test_int_x86_xop_vpermil2pd_256_mr:
42; CHECK:       # %bb.0:
43; CHECK-NEXT:    vpermil2pd $2, %ymm1, (%rdi), %ymm0, %ymm0
44; CHECK-NEXT:    retq
45  %vec = load <4 x double>, <4 x double>* %a1
46  %res = call <4 x double> @llvm.x86.xop.vpermil2pd.256(<4 x double> %a0, <4 x double> %vec, <4 x i64> %a2, i8 2) ;
47  ret <4 x double> %res
48}
49define <4 x double> @test_int_x86_xop_vpermil2pd_256_rm(<4 x double> %a0, <4 x double> %a1, <4 x i64>* %a2) {
50; CHECK-LABEL: test_int_x86_xop_vpermil2pd_256_rm:
51; CHECK:       # %bb.0:
52; CHECK-NEXT:    vpermil2pd $2, (%rdi), %ymm1, %ymm0, %ymm0
53; CHECK-NEXT:    retq
54  %vec = load <4 x i64>, <4 x i64>* %a2
55  %res = call <4 x double> @llvm.x86.xop.vpermil2pd.256(<4 x double> %a0, <4 x double> %a1, <4 x i64> %vec, i8 2) ;
56  ret <4 x double> %res
57}
58declare <4 x double> @llvm.x86.xop.vpermil2pd.256(<4 x double>, <4 x double>, <4 x i64>, i8) nounwind readnone
59
60define <4 x float> @test_int_x86_xop_vpermil2ps(<4 x float> %a0, <4 x float> %a1, <4 x i32> %a2) {
61; CHECK-LABEL: test_int_x86_xop_vpermil2ps:
62; CHECK:       # %bb.0:
63; CHECK-NEXT:    vpermil2ps $3, %xmm2, %xmm1, %xmm0, %xmm0
64; CHECK-NEXT:    retq
65  %res = call <4 x float> @llvm.x86.xop.vpermil2ps(<4 x float> %a0, <4 x float> %a1, <4 x i32> %a2, i8 3) ;
66  ret <4 x float> %res
67}
68declare <4 x float> @llvm.x86.xop.vpermil2ps(<4 x float>, <4 x float>, <4 x i32>, i8) nounwind readnone
69
70define <8 x float> @test_int_x86_xop_vpermil2ps_256(<8 x float> %a0, <8 x float> %a1, <8 x i32> %a2) {
71; CHECK-LABEL: test_int_x86_xop_vpermil2ps_256:
72; CHECK:       # %bb.0:
73; CHECK-NEXT:    vpermil2ps $4, %ymm2, %ymm1, %ymm0, %ymm0
74; CHECK-NEXT:    retq
75  %res = call <8 x float> @llvm.x86.xop.vpermil2ps.256(<8 x float> %a0, <8 x float> %a1, <8 x i32> %a2, i8 4) ;
76  ret <8 x float> %res
77}
78declare <8 x float> @llvm.x86.xop.vpermil2ps.256(<8 x float>, <8 x float>, <8 x i32>, i8) nounwind readnone
79
80define <2 x i64> @test_int_x86_xop_vpcmov(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2) {
81; CHECK-LABEL: test_int_x86_xop_vpcmov:
82; CHECK:       # %bb.0:
83; CHECK-NEXT:    vpcmov %xmm2, %xmm1, %xmm0, %xmm0
84; CHECK-NEXT:    retq
85  %1 = xor <2 x i64> %a2, <i64 -1, i64 -1>
86  %2 = and <2 x i64> %a0, %a2
87  %3 = and <2 x i64> %a1, %1
88  %4 = or <2 x i64> %2, %3
89  ret <2 x i64> %4
90}
91
92define <4 x i64> @test_int_x86_xop_vpcmov_256(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> %a2) {
93; CHECK-LABEL: test_int_x86_xop_vpcmov_256:
94; CHECK:       # %bb.0:
95; CHECK-NEXT:    vpcmov %ymm2, %ymm1, %ymm0, %ymm0
96; CHECK-NEXT:    retq
97  %1 = xor <4 x i64> %a2, <i64 -1, i64 -1, i64 -1, i64 -1>
98  %2 = and <4 x i64> %a0, %a2
99  %3 = and <4 x i64> %a1, %1
100  %4 = or <4 x i64> %2, %3
101  ret <4 x i64> %4
102}
103define <4 x i64> @test_int_x86_xop_vpcmov_256_mr(<4 x i64> %a0, <4 x i64>* %a1, <4 x i64> %a2) {
104; CHECK-LABEL: test_int_x86_xop_vpcmov_256_mr:
105; CHECK:       # %bb.0:
106; CHECK-NEXT:    vpcmov %ymm1, (%rdi), %ymm0, %ymm0
107; CHECK-NEXT:    retq
108  %vec = load <4 x i64>, <4 x i64>* %a1
109  %1 = xor <4 x i64> %a2, <i64 -1, i64 -1, i64 -1, i64 -1>
110  %2 = and <4 x i64> %a0, %a2
111  %3 = and <4 x i64> %vec, %1
112  %4 = or <4 x i64> %2, %3
113  ret <4 x i64> %4
114}
115define <4 x i64> @test_int_x86_xop_vpcmov_256_rm(<4 x i64> %a0, <4 x i64> %a1, <4 x i64>* %a2) {
116; CHECK-LABEL: test_int_x86_xop_vpcmov_256_rm:
117; CHECK:       # %bb.0:
118; CHECK-NEXT:    vpcmov (%rdi), %ymm1, %ymm0, %ymm0
119; CHECK-NEXT:    retq
120  %vec = load <4 x i64>, <4 x i64>* %a2
121  %1 = xor <4 x i64> %vec, <i64 -1, i64 -1, i64 -1, i64 -1>
122  %2 = and <4 x i64> %a0, %vec
123  %3 = and <4 x i64> %a1, %1
124  %4 = or <4 x i64> %2, %3
125  ret <4 x i64> %4
126}
127
128define <4 x i32> @test_int_x86_xop_vphaddbd(<16 x i8> %a0) {
129; CHECK-LABEL: test_int_x86_xop_vphaddbd:
130; CHECK:       # %bb.0:
131; CHECK-NEXT:    vphaddbd %xmm0, %xmm0
132; CHECK-NEXT:    retq
133  %res = call <4 x i32> @llvm.x86.xop.vphaddbd(<16 x i8> %a0) ;
134  ret <4 x i32> %res
135}
136declare <4 x i32> @llvm.x86.xop.vphaddbd(<16 x i8>) nounwind readnone
137
138define <2 x i64> @test_int_x86_xop_vphaddbq(<16 x i8> %a0) {
139; CHECK-LABEL: test_int_x86_xop_vphaddbq:
140; CHECK:       # %bb.0:
141; CHECK-NEXT:    vphaddbq %xmm0, %xmm0
142; CHECK-NEXT:    retq
143  %res = call <2 x i64> @llvm.x86.xop.vphaddbq(<16 x i8> %a0) ;
144  ret <2 x i64> %res
145}
146declare <2 x i64> @llvm.x86.xop.vphaddbq(<16 x i8>) nounwind readnone
147
148define <8 x i16> @test_int_x86_xop_vphaddbw(<16 x i8> %a0) {
149; CHECK-LABEL: test_int_x86_xop_vphaddbw:
150; CHECK:       # %bb.0:
151; CHECK-NEXT:    vphaddbw %xmm0, %xmm0
152; CHECK-NEXT:    retq
153  %res = call <8 x i16> @llvm.x86.xop.vphaddbw(<16 x i8> %a0) ;
154  ret <8 x i16> %res
155}
156declare <8 x i16> @llvm.x86.xop.vphaddbw(<16 x i8>) nounwind readnone
157
158define <2 x i64> @test_int_x86_xop_vphadddq(<4 x i32> %a0) {
159; CHECK-LABEL: test_int_x86_xop_vphadddq:
160; CHECK:       # %bb.0:
161; CHECK-NEXT:    vphadddq %xmm0, %xmm0
162; CHECK-NEXT:    retq
163  %res = call <2 x i64> @llvm.x86.xop.vphadddq(<4 x i32> %a0) ;
164  ret <2 x i64> %res
165}
166declare <2 x i64> @llvm.x86.xop.vphadddq(<4 x i32>) nounwind readnone
167
168define <4 x i32> @test_int_x86_xop_vphaddubd(<16 x i8> %a0) {
169; CHECK-LABEL: test_int_x86_xop_vphaddubd:
170; CHECK:       # %bb.0:
171; CHECK-NEXT:    vphaddubd %xmm0, %xmm0
172; CHECK-NEXT:    retq
173  %res = call <4 x i32> @llvm.x86.xop.vphaddubd(<16 x i8> %a0) ;
174  ret <4 x i32> %res
175}
176declare <4 x i32> @llvm.x86.xop.vphaddubd(<16 x i8>) nounwind readnone
177
178define <2 x i64> @test_int_x86_xop_vphaddubq(<16 x i8> %a0) {
179; CHECK-LABEL: test_int_x86_xop_vphaddubq:
180; CHECK:       # %bb.0:
181; CHECK-NEXT:    vphaddubq %xmm0, %xmm0
182; CHECK-NEXT:    retq
183  %res = call <2 x i64> @llvm.x86.xop.vphaddubq(<16 x i8> %a0) ;
184  ret <2 x i64> %res
185}
186declare <2 x i64> @llvm.x86.xop.vphaddubq(<16 x i8>) nounwind readnone
187
188define <8 x i16> @test_int_x86_xop_vphaddubw(<16 x i8> %a0) {
189; CHECK-LABEL: test_int_x86_xop_vphaddubw:
190; CHECK:       # %bb.0:
191; CHECK-NEXT:    vphaddubw %xmm0, %xmm0
192; CHECK-NEXT:    retq
193  %res = call <8 x i16> @llvm.x86.xop.vphaddubw(<16 x i8> %a0) ;
194  ret <8 x i16> %res
195}
196declare <8 x i16> @llvm.x86.xop.vphaddubw(<16 x i8>) nounwind readnone
197
198define <2 x i64> @test_int_x86_xop_vphaddudq(<4 x i32> %a0) {
199; CHECK-LABEL: test_int_x86_xop_vphaddudq:
200; CHECK:       # %bb.0:
201; CHECK-NEXT:    vphaddudq %xmm0, %xmm0
202; CHECK-NEXT:    retq
203  %res = call <2 x i64> @llvm.x86.xop.vphaddudq(<4 x i32> %a0) ;
204  ret <2 x i64> %res
205}
206declare <2 x i64> @llvm.x86.xop.vphaddudq(<4 x i32>) nounwind readnone
207
208define <4 x i32> @test_int_x86_xop_vphadduwd(<8 x i16> %a0) {
209; CHECK-LABEL: test_int_x86_xop_vphadduwd:
210; CHECK:       # %bb.0:
211; CHECK-NEXT:    vphadduwd %xmm0, %xmm0
212; CHECK-NEXT:    retq
213  %res = call <4 x i32> @llvm.x86.xop.vphadduwd(<8 x i16> %a0) ;
214  ret <4 x i32> %res
215}
216declare <4 x i32> @llvm.x86.xop.vphadduwd(<8 x i16>) nounwind readnone
217
218define <2 x i64> @test_int_x86_xop_vphadduwq(<8 x i16> %a0) {
219; CHECK-LABEL: test_int_x86_xop_vphadduwq:
220; CHECK:       # %bb.0:
221; CHECK-NEXT:    vphadduwq %xmm0, %xmm0
222; CHECK-NEXT:    retq
223  %res = call <2 x i64> @llvm.x86.xop.vphadduwq(<8 x i16> %a0) ;
224  ret <2 x i64> %res
225}
226declare <2 x i64> @llvm.x86.xop.vphadduwq(<8 x i16>) nounwind readnone
227
228define <4 x i32> @test_int_x86_xop_vphaddwd(<8 x i16> %a0) {
229; CHECK-LABEL: test_int_x86_xop_vphaddwd:
230; CHECK:       # %bb.0:
231; CHECK-NEXT:    vphaddwd %xmm0, %xmm0
232; CHECK-NEXT:    retq
233  %res = call <4 x i32> @llvm.x86.xop.vphaddwd(<8 x i16> %a0) ;
234  ret <4 x i32> %res
235}
236declare <4 x i32> @llvm.x86.xop.vphaddwd(<8 x i16>) nounwind readnone
237
238define <2 x i64> @test_int_x86_xop_vphaddwq(<8 x i16> %a0) {
239; CHECK-LABEL: test_int_x86_xop_vphaddwq:
240; CHECK:       # %bb.0:
241; CHECK-NEXT:    vphaddwq %xmm0, %xmm0
242; CHECK-NEXT:    retq
243  %res = call <2 x i64> @llvm.x86.xop.vphaddwq(<8 x i16> %a0) ;
244  ret <2 x i64> %res
245}
246declare <2 x i64> @llvm.x86.xop.vphaddwq(<8 x i16>) nounwind readnone
247
248define <8 x i16> @test_int_x86_xop_vphsubbw(<16 x i8> %a0) {
249; CHECK-LABEL: test_int_x86_xop_vphsubbw:
250; CHECK:       # %bb.0:
251; CHECK-NEXT:    vphsubbw %xmm0, %xmm0
252; CHECK-NEXT:    retq
253  %res = call <8 x i16> @llvm.x86.xop.vphsubbw(<16 x i8> %a0) ;
254  ret <8 x i16> %res
255}
256declare <8 x i16> @llvm.x86.xop.vphsubbw(<16 x i8>) nounwind readnone
257
258define <2 x i64> @test_int_x86_xop_vphsubdq(<4 x i32> %a0) {
259; CHECK-LABEL: test_int_x86_xop_vphsubdq:
260; CHECK:       # %bb.0:
261; CHECK-NEXT:    vphsubdq %xmm0, %xmm0
262; CHECK-NEXT:    retq
263  %res = call <2 x i64> @llvm.x86.xop.vphsubdq(<4 x i32> %a0) ;
264  ret <2 x i64> %res
265}
266define <2 x i64> @test_int_x86_xop_vphsubdq_mem(<4 x i32>* %a0) {
267; CHECK-LABEL: test_int_x86_xop_vphsubdq_mem:
268; CHECK:       # %bb.0:
269; CHECK-NEXT:    vphsubdq (%rdi), %xmm0
270; CHECK-NEXT:    retq
271  %vec = load <4 x i32>, <4 x i32>* %a0
272  %res = call <2 x i64> @llvm.x86.xop.vphsubdq(<4 x i32> %vec) ;
273  ret <2 x i64> %res
274}
275declare <2 x i64> @llvm.x86.xop.vphsubdq(<4 x i32>) nounwind readnone
276
277define <4 x i32> @test_int_x86_xop_vphsubwd(<8 x i16> %a0) {
278; CHECK-LABEL: test_int_x86_xop_vphsubwd:
279; CHECK:       # %bb.0:
280; CHECK-NEXT:    vphsubwd %xmm0, %xmm0
281; CHECK-NEXT:    retq
282  %res = call <4 x i32> @llvm.x86.xop.vphsubwd(<8 x i16> %a0) ;
283  ret <4 x i32> %res
284}
285define <4 x i32> @test_int_x86_xop_vphsubwd_mem(<8 x i16>* %a0) {
286; CHECK-LABEL: test_int_x86_xop_vphsubwd_mem:
287; CHECK:       # %bb.0:
288; CHECK-NEXT:    vphsubwd (%rdi), %xmm0
289; CHECK-NEXT:    retq
290  %vec = load <8 x i16>, <8 x i16>* %a0
291  %res = call <4 x i32> @llvm.x86.xop.vphsubwd(<8 x i16> %vec) ;
292  ret <4 x i32> %res
293}
294declare <4 x i32> @llvm.x86.xop.vphsubwd(<8 x i16>) nounwind readnone
295
296define <4 x i32> @test_int_x86_xop_vpmacsdd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> %a2) {
297; CHECK-LABEL: test_int_x86_xop_vpmacsdd:
298; CHECK:       # %bb.0:
299; CHECK-NEXT:    vpmacsdd %xmm2, %xmm1, %xmm0, %xmm0
300; CHECK-NEXT:    retq
301  %res = call <4 x i32> @llvm.x86.xop.vpmacsdd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> %a2) ;
302  ret <4 x i32> %res
303}
304declare <4 x i32> @llvm.x86.xop.vpmacsdd(<4 x i32>, <4 x i32>, <4 x i32>) nounwind readnone
305
306define <2 x i64> @test_int_x86_xop_vpmacsdqh(<4 x i32> %a0, <4 x i32> %a1, <2 x i64> %a2) {
307; CHECK-LABEL: test_int_x86_xop_vpmacsdqh:
308; CHECK:       # %bb.0:
309; CHECK-NEXT:    vpmacsdqh %xmm2, %xmm1, %xmm0, %xmm0
310; CHECK-NEXT:    retq
311  %res = call <2 x i64> @llvm.x86.xop.vpmacsdqh(<4 x i32> %a0, <4 x i32> %a1, <2 x i64> %a2) ;
312  ret <2 x i64> %res
313}
314declare <2 x i64> @llvm.x86.xop.vpmacsdqh(<4 x i32>, <4 x i32>, <2 x i64>) nounwind readnone
315
316define <2 x i64> @test_int_x86_xop_vpmacsdql(<4 x i32> %a0, <4 x i32> %a1, <2 x i64> %a2) {
317; CHECK-LABEL: test_int_x86_xop_vpmacsdql:
318; CHECK:       # %bb.0:
319; CHECK-NEXT:    vpmacsdql %xmm2, %xmm1, %xmm0, %xmm0
320; CHECK-NEXT:    retq
321  %res = call <2 x i64> @llvm.x86.xop.vpmacsdql(<4 x i32> %a0, <4 x i32> %a1, <2 x i64> %a2) ;
322  ret <2 x i64> %res
323}
324declare <2 x i64> @llvm.x86.xop.vpmacsdql(<4 x i32>, <4 x i32>, <2 x i64>) nounwind readnone
325
326define <4 x i32> @test_int_x86_xop_vpmacssdd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> %a2) {
327; CHECK-LABEL: test_int_x86_xop_vpmacssdd:
328; CHECK:       # %bb.0:
329; CHECK-NEXT:    vpmacssdd %xmm2, %xmm1, %xmm0, %xmm0
330; CHECK-NEXT:    retq
331  %res = call <4 x i32> @llvm.x86.xop.vpmacssdd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> %a2) ;
332  ret <4 x i32> %res
333}
334declare <4 x i32> @llvm.x86.xop.vpmacssdd(<4 x i32>, <4 x i32>, <4 x i32>) nounwind readnone
335
336define <2 x i64> @test_int_x86_xop_vpmacssdqh(<4 x i32> %a0, <4 x i32> %a1, <2 x i64> %a2) {
337; CHECK-LABEL: test_int_x86_xop_vpmacssdqh:
338; CHECK:       # %bb.0:
339; CHECK-NEXT:    vpmacssdqh %xmm2, %xmm1, %xmm0, %xmm0
340; CHECK-NEXT:    retq
341  %res = call <2 x i64> @llvm.x86.xop.vpmacssdqh(<4 x i32> %a0, <4 x i32> %a1, <2 x i64> %a2) ;
342  ret <2 x i64> %res
343}
344declare <2 x i64> @llvm.x86.xop.vpmacssdqh(<4 x i32>, <4 x i32>, <2 x i64>) nounwind readnone
345
346define <2 x i64> @test_int_x86_xop_vpmacssdql(<4 x i32> %a0, <4 x i32> %a1, <2 x i64> %a2) {
347; CHECK-LABEL: test_int_x86_xop_vpmacssdql:
348; CHECK:       # %bb.0:
349; CHECK-NEXT:    vpmacssdql %xmm2, %xmm1, %xmm0, %xmm0
350; CHECK-NEXT:    retq
351  %res = call <2 x i64> @llvm.x86.xop.vpmacssdql(<4 x i32> %a0, <4 x i32> %a1, <2 x i64> %a2) ;
352  ret <2 x i64> %res
353}
354declare <2 x i64> @llvm.x86.xop.vpmacssdql(<4 x i32>, <4 x i32>, <2 x i64>) nounwind readnone
355
356define <4 x i32> @test_int_x86_xop_vpmacsswd(<8 x i16> %a0, <8 x i16> %a1, <4 x i32> %a2) {
357; CHECK-LABEL: test_int_x86_xop_vpmacsswd:
358; CHECK:       # %bb.0:
359; CHECK-NEXT:    vpmacsswd %xmm2, %xmm1, %xmm0, %xmm0
360; CHECK-NEXT:    retq
361  %res = call <4 x i32> @llvm.x86.xop.vpmacsswd(<8 x i16> %a0, <8 x i16> %a1, <4 x i32> %a2) ;
362  ret <4 x i32> %res
363}
364declare <4 x i32> @llvm.x86.xop.vpmacsswd(<8 x i16>, <8 x i16>, <4 x i32>) nounwind readnone
365
366define <8 x i16> @test_int_x86_xop_vpmacssww(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> %a2) {
367; CHECK-LABEL: test_int_x86_xop_vpmacssww:
368; CHECK:       # %bb.0:
369; CHECK-NEXT:    vpmacssww %xmm2, %xmm1, %xmm0, %xmm0
370; CHECK-NEXT:    retq
371  %res = call <8 x i16> @llvm.x86.xop.vpmacssww(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> %a2) ;
372  ret <8 x i16> %res
373}
374declare <8 x i16> @llvm.x86.xop.vpmacssww(<8 x i16>, <8 x i16>, <8 x i16>) nounwind readnone
375
376define <4 x i32> @test_int_x86_xop_vpmacswd(<8 x i16> %a0, <8 x i16> %a1, <4 x i32> %a2) {
377; CHECK-LABEL: test_int_x86_xop_vpmacswd:
378; CHECK:       # %bb.0:
379; CHECK-NEXT:    vpmacswd %xmm2, %xmm1, %xmm0, %xmm0
380; CHECK-NEXT:    retq
381  %res = call <4 x i32> @llvm.x86.xop.vpmacswd(<8 x i16> %a0, <8 x i16> %a1, <4 x i32> %a2) ;
382  ret <4 x i32> %res
383}
384declare <4 x i32> @llvm.x86.xop.vpmacswd(<8 x i16>, <8 x i16>, <4 x i32>) nounwind readnone
385
386define <8 x i16> @test_int_x86_xop_vpmacsww(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> %a2) {
387; CHECK-LABEL: test_int_x86_xop_vpmacsww:
388; CHECK:       # %bb.0:
389; CHECK-NEXT:    vpmacsww %xmm2, %xmm1, %xmm0, %xmm0
390; CHECK-NEXT:    retq
391  %res = call <8 x i16> @llvm.x86.xop.vpmacsww(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> %a2) ;
392  ret <8 x i16> %res
393}
394declare <8 x i16> @llvm.x86.xop.vpmacsww(<8 x i16>, <8 x i16>, <8 x i16>) nounwind readnone
395
396define <4 x i32> @test_int_x86_xop_vpmadcsswd(<8 x i16> %a0, <8 x i16> %a1, <4 x i32> %a2) {
397; CHECK-LABEL: test_int_x86_xop_vpmadcsswd:
398; CHECK:       # %bb.0:
399; CHECK-NEXT:    vpmadcsswd %xmm2, %xmm1, %xmm0, %xmm0
400; CHECK-NEXT:    retq
401  %res = call <4 x i32> @llvm.x86.xop.vpmadcsswd(<8 x i16> %a0, <8 x i16> %a1, <4 x i32> %a2) ;
402  ret <4 x i32> %res
403}
404declare <4 x i32> @llvm.x86.xop.vpmadcsswd(<8 x i16>, <8 x i16>, <4 x i32>) nounwind readnone
405
406define <4 x i32> @test_int_x86_xop_vpmadcswd(<8 x i16> %a0, <8 x i16> %a1, <4 x i32> %a2) {
407; CHECK-LABEL: test_int_x86_xop_vpmadcswd:
408; CHECK:       # %bb.0:
409; CHECK-NEXT:    vpmadcswd %xmm2, %xmm1, %xmm0, %xmm0
410; CHECK-NEXT:    retq
411  %res = call <4 x i32> @llvm.x86.xop.vpmadcswd(<8 x i16> %a0, <8 x i16> %a1, <4 x i32> %a2) ;
412  ret <4 x i32> %res
413}
414define <4 x i32> @test_int_x86_xop_vpmadcswd_mem(<8 x i16> %a0, <8 x i16>* %a1, <4 x i32> %a2) {
415; CHECK-LABEL: test_int_x86_xop_vpmadcswd_mem:
416; CHECK:       # %bb.0:
417; CHECK-NEXT:    vpmadcswd %xmm1, (%rdi), %xmm0, %xmm0
418; CHECK-NEXT:    retq
419  %vec = load <8 x i16>, <8 x i16>* %a1
420  %res = call <4 x i32> @llvm.x86.xop.vpmadcswd(<8 x i16> %a0, <8 x i16> %vec, <4 x i32> %a2) ;
421  ret <4 x i32> %res
422}
423declare <4 x i32> @llvm.x86.xop.vpmadcswd(<8 x i16>, <8 x i16>, <4 x i32>) nounwind readnone
424
425define <16 x i8> @test_int_x86_xop_vpperm(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %a2) {
426; CHECK-LABEL: test_int_x86_xop_vpperm:
427; CHECK:       # %bb.0:
428; CHECK-NEXT:    vpperm %xmm2, %xmm1, %xmm0, %xmm0
429; CHECK-NEXT:    retq
430  %res = call <16 x i8> @llvm.x86.xop.vpperm(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %a2) ;
431  ret <16 x i8> %res
432}
433define <16 x i8> @test_int_x86_xop_vpperm_rm(<16 x i8> %a0, <16 x i8> %a1, <16 x i8>* %a2) {
434; CHECK-LABEL: test_int_x86_xop_vpperm_rm:
435; CHECK:       # %bb.0:
436; CHECK-NEXT:    vpperm (%rdi), %xmm1, %xmm0, %xmm0
437; CHECK-NEXT:    retq
438  %vec = load <16 x i8>, <16 x i8>* %a2
439  %res = call <16 x i8> @llvm.x86.xop.vpperm(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %vec) ;
440  ret <16 x i8> %res
441}
442define <16 x i8> @test_int_x86_xop_vpperm_mr(<16 x i8> %a0, <16 x i8>* %a1, <16 x i8> %a2) {
443; CHECK-LABEL: test_int_x86_xop_vpperm_mr:
444; CHECK:       # %bb.0:
445; CHECK-NEXT:    vpperm %xmm1, (%rdi), %xmm0, %xmm0
446; CHECK-NEXT:    retq
447  %vec = load <16 x i8>, <16 x i8>* %a1
448  %res = call <16 x i8> @llvm.x86.xop.vpperm(<16 x i8> %a0, <16 x i8> %vec, <16 x i8> %a2) ;
449  ret <16 x i8> %res
450}
451declare <16 x i8> @llvm.x86.xop.vpperm(<16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone
452
453define <16 x i8> @test_int_x86_xop_vpshab(<16 x i8> %a0, <16 x i8> %a1) {
454; CHECK-LABEL: test_int_x86_xop_vpshab:
455; CHECK:       # %bb.0:
456; CHECK-NEXT:    vpshab %xmm1, %xmm0, %xmm0
457; CHECK-NEXT:    retq
458  %res = call <16 x i8> @llvm.x86.xop.vpshab(<16 x i8> %a0, <16 x i8> %a1) ;
459  ret <16 x i8> %res
460}
461declare <16 x i8> @llvm.x86.xop.vpshab(<16 x i8>, <16 x i8>) nounwind readnone
462
463define <4 x i32> @test_int_x86_xop_vpshad(<4 x i32> %a0, <4 x i32> %a1) {
464; CHECK-LABEL: test_int_x86_xop_vpshad:
465; CHECK:       # %bb.0:
466; CHECK-NEXT:    vpshad %xmm1, %xmm0, %xmm0
467; CHECK-NEXT:    retq
468  %res = call <4 x i32> @llvm.x86.xop.vpshad(<4 x i32> %a0, <4 x i32> %a1) ;
469  ret <4 x i32> %res
470}
471declare <4 x i32> @llvm.x86.xop.vpshad(<4 x i32>, <4 x i32>) nounwind readnone
472
473define <2 x i64> @test_int_x86_xop_vpshaq(<2 x i64> %a0, <2 x i64> %a1) {
474; CHECK-LABEL: test_int_x86_xop_vpshaq:
475; CHECK:       # %bb.0:
476; CHECK-NEXT:    vpshaq %xmm1, %xmm0, %xmm0
477; CHECK-NEXT:    retq
478  %res = call <2 x i64> @llvm.x86.xop.vpshaq(<2 x i64> %a0, <2 x i64> %a1) ;
479  ret <2 x i64> %res
480}
481declare <2 x i64> @llvm.x86.xop.vpshaq(<2 x i64>, <2 x i64>) nounwind readnone
482
483define <8 x i16> @test_int_x86_xop_vpshaw(<8 x i16> %a0, <8 x i16> %a1) {
484; CHECK-LABEL: test_int_x86_xop_vpshaw:
485; CHECK:       # %bb.0:
486; CHECK-NEXT:    vpshaw %xmm1, %xmm0, %xmm0
487; CHECK-NEXT:    retq
488  %res = call <8 x i16> @llvm.x86.xop.vpshaw(<8 x i16> %a0, <8 x i16> %a1) ;
489  ret <8 x i16> %res
490}
491declare <8 x i16> @llvm.x86.xop.vpshaw(<8 x i16>, <8 x i16>) nounwind readnone
492
493define <16 x i8> @test_int_x86_xop_vpshlb(<16 x i8> %a0, <16 x i8> %a1) {
494; CHECK-LABEL: test_int_x86_xop_vpshlb:
495; CHECK:       # %bb.0:
496; CHECK-NEXT:    vpshlb %xmm1, %xmm0, %xmm0
497; CHECK-NEXT:    retq
498  %res = call <16 x i8> @llvm.x86.xop.vpshlb(<16 x i8> %a0, <16 x i8> %a1) ;
499  ret <16 x i8> %res
500}
501declare <16 x i8> @llvm.x86.xop.vpshlb(<16 x i8>, <16 x i8>) nounwind readnone
502
503define <4 x i32> @test_int_x86_xop_vpshld(<4 x i32> %a0, <4 x i32> %a1) {
504; CHECK-LABEL: test_int_x86_xop_vpshld:
505; CHECK:       # %bb.0:
506; CHECK-NEXT:    vpshld %xmm1, %xmm0, %xmm0
507; CHECK-NEXT:    retq
508  %res = call <4 x i32> @llvm.x86.xop.vpshld(<4 x i32> %a0, <4 x i32> %a1) ;
509  ret <4 x i32> %res
510}
511declare <4 x i32> @llvm.x86.xop.vpshld(<4 x i32>, <4 x i32>) nounwind readnone
512
513define <2 x i64> @test_int_x86_xop_vpshlq(<2 x i64> %a0, <2 x i64> %a1) {
514; CHECK-LABEL: test_int_x86_xop_vpshlq:
515; CHECK:       # %bb.0:
516; CHECK-NEXT:    vpshlq %xmm1, %xmm0, %xmm0
517; CHECK-NEXT:    retq
518  %res = call <2 x i64> @llvm.x86.xop.vpshlq(<2 x i64> %a0, <2 x i64> %a1) ;
519  ret <2 x i64> %res
520}
521declare <2 x i64> @llvm.x86.xop.vpshlq(<2 x i64>, <2 x i64>) nounwind readnone
522
523define <8 x i16> @test_int_x86_xop_vpshlw(<8 x i16> %a0, <8 x i16> %a1) {
524; CHECK-LABEL: test_int_x86_xop_vpshlw:
525; CHECK:       # %bb.0:
526; CHECK-NEXT:    vpshlw %xmm1, %xmm0, %xmm0
527; CHECK-NEXT:    retq
528  %res = call <8 x i16> @llvm.x86.xop.vpshlw(<8 x i16> %a0, <8 x i16> %a1) ;
529  ret <8 x i16> %res
530}
531define <8 x i16> @test_int_x86_xop_vpshlw_rm(<8 x i16> %a0, <8 x i16>* %a1) {
532; CHECK-LABEL: test_int_x86_xop_vpshlw_rm:
533; CHECK:       # %bb.0:
534; CHECK-NEXT:    vpshlw (%rdi), %xmm0, %xmm0
535; CHECK-NEXT:    retq
536  %vec = load <8 x i16>, <8 x i16>* %a1
537  %res = call <8 x i16> @llvm.x86.xop.vpshlw(<8 x i16> %a0, <8 x i16> %vec) ;
538  ret <8 x i16> %res
539}
540define <8 x i16> @test_int_x86_xop_vpshlw_mr(<8 x i16>* %a0, <8 x i16> %a1) {
541; CHECK-LABEL: test_int_x86_xop_vpshlw_mr:
542; CHECK:       # %bb.0:
543; CHECK-NEXT:    vpshlw %xmm0, (%rdi), %xmm0
544; CHECK-NEXT:    retq
545  %vec = load <8 x i16>, <8 x i16>* %a0
546  %res = call <8 x i16> @llvm.x86.xop.vpshlw(<8 x i16> %vec, <8 x i16> %a1) ;
547  ret <8 x i16> %res
548}
549declare <8 x i16> @llvm.x86.xop.vpshlw(<8 x i16>, <8 x i16>) nounwind readnone
550
551define <4 x float> @test_int_x86_xop_vfrcz_ss(<4 x float> %a0) {
552; CHECK-LABEL: test_int_x86_xop_vfrcz_ss:
553; CHECK:       # %bb.0:
554; CHECK-NEXT:    vfrczss %xmm0, %xmm0
555; CHECK-NEXT:    retq
556  %res = call <4 x float> @llvm.x86.xop.vfrcz.ss(<4 x float> %a0) ;
557  ret <4 x float> %res
558}
559define <4 x float> @test_int_x86_xop_vfrcz_ss_mem(float* %a0) {
560; CHECK-LABEL: test_int_x86_xop_vfrcz_ss_mem:
561; CHECK:       # %bb.0:
562; CHECK-NEXT:    vfrczss (%rdi), %xmm0
563; CHECK-NEXT:    retq
564  %elem = load float, float* %a0
565  %vec = insertelement <4 x float> undef, float %elem, i32 0
566  %res = call <4 x float> @llvm.x86.xop.vfrcz.ss(<4 x float> %vec) ;
567  ret <4 x float> %res
568}
569declare <4 x float> @llvm.x86.xop.vfrcz.ss(<4 x float>) nounwind readnone
570
571define <2 x double> @test_int_x86_xop_vfrcz_sd(<2 x double> %a0) {
572; CHECK-LABEL: test_int_x86_xop_vfrcz_sd:
573; CHECK:       # %bb.0:
574; CHECK-NEXT:    vfrczsd %xmm0, %xmm0
575; CHECK-NEXT:    retq
576  %res = call <2 x double> @llvm.x86.xop.vfrcz.sd(<2 x double> %a0) ;
577  ret <2 x double> %res
578}
579define <2 x double> @test_int_x86_xop_vfrcz_sd_mem(double* %a0) {
580; CHECK-LABEL: test_int_x86_xop_vfrcz_sd_mem:
581; CHECK:       # %bb.0:
582; CHECK-NEXT:    vfrczsd (%rdi), %xmm0
583; CHECK-NEXT:    retq
584  %elem = load double, double* %a0
585  %vec = insertelement <2 x double> undef, double %elem, i32 0
586  %res = call <2 x double> @llvm.x86.xop.vfrcz.sd(<2 x double> %vec) ;
587  ret <2 x double> %res
588}
589declare <2 x double> @llvm.x86.xop.vfrcz.sd(<2 x double>) nounwind readnone
590
591define <2 x double> @test_int_x86_xop_vfrcz_pd(<2 x double> %a0) {
592; CHECK-LABEL: test_int_x86_xop_vfrcz_pd:
593; CHECK:       # %bb.0:
594; CHECK-NEXT:    vfrczpd %xmm0, %xmm0
595; CHECK-NEXT:    retq
596  %res = call <2 x double> @llvm.x86.xop.vfrcz.pd(<2 x double> %a0) ;
597  ret <2 x double> %res
598}
599define <2 x double> @test_int_x86_xop_vfrcz_pd_mem(<2 x double>* %a0) {
600; CHECK-LABEL: test_int_x86_xop_vfrcz_pd_mem:
601; CHECK:       # %bb.0:
602; CHECK-NEXT:    vfrczpd (%rdi), %xmm0
603; CHECK-NEXT:    retq
604  %vec = load <2 x double>, <2 x double>* %a0
605  %res = call <2 x double> @llvm.x86.xop.vfrcz.pd(<2 x double> %vec) ;
606  ret <2 x double> %res
607}
608declare <2 x double> @llvm.x86.xop.vfrcz.pd(<2 x double>) nounwind readnone
609
610define <4 x double> @test_int_x86_xop_vfrcz_pd_256(<4 x double> %a0) {
611; CHECK-LABEL: test_int_x86_xop_vfrcz_pd_256:
612; CHECK:       # %bb.0:
613; CHECK-NEXT:    vfrczpd %ymm0, %ymm0
614; CHECK-NEXT:    retq
615  %res = call <4 x double> @llvm.x86.xop.vfrcz.pd.256(<4 x double> %a0) ;
616  ret <4 x double> %res
617}
618define <4 x double> @test_int_x86_xop_vfrcz_pd_256_mem(<4 x double>* %a0) {
619; CHECK-LABEL: test_int_x86_xop_vfrcz_pd_256_mem:
620; CHECK:       # %bb.0:
621; CHECK-NEXT:    vfrczpd (%rdi), %ymm0
622; CHECK-NEXT:    retq
623  %vec = load <4 x double>, <4 x double>* %a0
624  %res = call <4 x double> @llvm.x86.xop.vfrcz.pd.256(<4 x double> %vec) ;
625  ret <4 x double> %res
626}
627declare <4 x double> @llvm.x86.xop.vfrcz.pd.256(<4 x double>) nounwind readnone
628
629define <4 x float> @test_int_x86_xop_vfrcz_ps(<4 x float> %a0) {
630; CHECK-LABEL: test_int_x86_xop_vfrcz_ps:
631; CHECK:       # %bb.0:
632; CHECK-NEXT:    vfrczps %xmm0, %xmm0
633; CHECK-NEXT:    retq
634  %res = call <4 x float> @llvm.x86.xop.vfrcz.ps(<4 x float> %a0) ;
635  ret <4 x float> %res
636}
637define <4 x float> @test_int_x86_xop_vfrcz_ps_mem(<4 x float>* %a0) {
638; CHECK-LABEL: test_int_x86_xop_vfrcz_ps_mem:
639; CHECK:       # %bb.0:
640; CHECK-NEXT:    vfrczps (%rdi), %xmm0
641; CHECK-NEXT:    retq
642  %vec = load <4 x float>, <4 x float>* %a0
643  %res = call <4 x float> @llvm.x86.xop.vfrcz.ps(<4 x float> %vec) ;
644  ret <4 x float> %res
645}
646declare <4 x float> @llvm.x86.xop.vfrcz.ps(<4 x float>) nounwind readnone
647
648define <8 x float> @test_int_x86_xop_vfrcz_ps_256(<8 x float> %a0) {
649; CHECK-LABEL: test_int_x86_xop_vfrcz_ps_256:
650; CHECK:       # %bb.0:
651; CHECK-NEXT:    vfrczps %ymm0, %ymm0
652; CHECK-NEXT:    retq
653  %res = call <8 x float> @llvm.x86.xop.vfrcz.ps.256(<8 x float> %a0) ;
654  ret <8 x float> %res
655}
656define <8 x float> @test_int_x86_xop_vfrcz_ps_256_mem(<8 x float>* %a0) {
657; CHECK-LABEL: test_int_x86_xop_vfrcz_ps_256_mem:
658; CHECK:       # %bb.0:
659; CHECK-NEXT:    vfrczps (%rdi), %ymm0
660; CHECK-NEXT:    retq
661  %vec = load <8 x float>, <8 x float>* %a0
662  %res = call <8 x float> @llvm.x86.xop.vfrcz.ps.256(<8 x float> %vec) ;
663  ret <8 x float> %res
664}
665declare <8 x float> @llvm.x86.xop.vfrcz.ps.256(<8 x float>) nounwind readnone
666