1; RUN: llc < %s -march=x86 -mattr=+mmx,+ssse3 | FileCheck %s
2
3declare x86_mmx @llvm.x86.ssse3.phadd.w(x86_mmx, x86_mmx) nounwind readnone
4
5define i64 @test1(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
6; CHECK: phaddw
7entry:
8  %0 = bitcast <1 x i64> %b to <4 x i16>
9  %1 = bitcast <1 x i64> %a to <4 x i16>
10  %2 = bitcast <4 x i16> %1 to x86_mmx
11  %3 = bitcast <4 x i16> %0 to x86_mmx
12  %4 = tail call x86_mmx @llvm.x86.ssse3.phadd.w(x86_mmx %2, x86_mmx %3) nounwind readnone
13  %5 = bitcast x86_mmx %4 to <4 x i16>
14  %6 = bitcast <4 x i16> %5 to <1 x i64>
15  %7 = extractelement <1 x i64> %6, i32 0
16  ret i64 %7
17}
18
19declare x86_mmx @llvm.x86.mmx.pcmpgt.d(x86_mmx, x86_mmx) nounwind readnone
20
21define i64 @test88(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
22; CHECK: pcmpgtd
23entry:
24  %0 = bitcast <1 x i64> %b to <2 x i32>
25  %1 = bitcast <1 x i64> %a to <2 x i32>
26  %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
27  %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
28  %2 = tail call x86_mmx @llvm.x86.mmx.pcmpgt.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
29  %3 = bitcast x86_mmx %2 to <2 x i32>
30  %4 = bitcast <2 x i32> %3 to <1 x i64>
31  %5 = extractelement <1 x i64> %4, i32 0
32  ret i64 %5
33}
34
35declare x86_mmx @llvm.x86.mmx.pcmpgt.w(x86_mmx, x86_mmx) nounwind readnone
36
37define i64 @test87(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
38; CHECK: pcmpgtw
39entry:
40  %0 = bitcast <1 x i64> %b to <4 x i16>
41  %1 = bitcast <1 x i64> %a to <4 x i16>
42  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
43  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
44  %2 = tail call x86_mmx @llvm.x86.mmx.pcmpgt.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
45  %3 = bitcast x86_mmx %2 to <4 x i16>
46  %4 = bitcast <4 x i16> %3 to <1 x i64>
47  %5 = extractelement <1 x i64> %4, i32 0
48  ret i64 %5
49}
50
51declare x86_mmx @llvm.x86.mmx.pcmpgt.b(x86_mmx, x86_mmx) nounwind readnone
52
53define i64 @test86(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
54; CHECK: pcmpgtb
55entry:
56  %0 = bitcast <1 x i64> %b to <8 x i8>
57  %1 = bitcast <1 x i64> %a to <8 x i8>
58  %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
59  %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
60  %2 = tail call x86_mmx @llvm.x86.mmx.pcmpgt.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
61  %3 = bitcast x86_mmx %2 to <8 x i8>
62  %4 = bitcast <8 x i8> %3 to <1 x i64>
63  %5 = extractelement <1 x i64> %4, i32 0
64  ret i64 %5
65}
66
67declare x86_mmx @llvm.x86.mmx.pcmpeq.d(x86_mmx, x86_mmx) nounwind readnone
68
69define i64 @test85(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
70; CHECK: pcmpeqd
71entry:
72  %0 = bitcast <1 x i64> %b to <2 x i32>
73  %1 = bitcast <1 x i64> %a to <2 x i32>
74  %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
75  %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
76  %2 = tail call x86_mmx @llvm.x86.mmx.pcmpeq.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
77  %3 = bitcast x86_mmx %2 to <2 x i32>
78  %4 = bitcast <2 x i32> %3 to <1 x i64>
79  %5 = extractelement <1 x i64> %4, i32 0
80  ret i64 %5
81}
82
83declare x86_mmx @llvm.x86.mmx.pcmpeq.w(x86_mmx, x86_mmx) nounwind readnone
84
85define i64 @test84(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
86; CHECK: pcmpeqw
87entry:
88  %0 = bitcast <1 x i64> %b to <4 x i16>
89  %1 = bitcast <1 x i64> %a to <4 x i16>
90  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
91  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
92  %2 = tail call x86_mmx @llvm.x86.mmx.pcmpeq.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
93  %3 = bitcast x86_mmx %2 to <4 x i16>
94  %4 = bitcast <4 x i16> %3 to <1 x i64>
95  %5 = extractelement <1 x i64> %4, i32 0
96  ret i64 %5
97}
98
99declare x86_mmx @llvm.x86.mmx.pcmpeq.b(x86_mmx, x86_mmx) nounwind readnone
100
101define i64 @test83(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
102; CHECK: pcmpeqb
103entry:
104  %0 = bitcast <1 x i64> %b to <8 x i8>
105  %1 = bitcast <1 x i64> %a to <8 x i8>
106  %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
107  %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
108  %2 = tail call x86_mmx @llvm.x86.mmx.pcmpeq.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
109  %3 = bitcast x86_mmx %2 to <8 x i8>
110  %4 = bitcast <8 x i8> %3 to <1 x i64>
111  %5 = extractelement <1 x i64> %4, i32 0
112  ret i64 %5
113}
114
115declare x86_mmx @llvm.x86.mmx.punpckldq(x86_mmx, x86_mmx) nounwind readnone
116
117define i64 @test82(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
118; CHECK: punpckldq
119entry:
120  %0 = bitcast <1 x i64> %b to <2 x i32>
121  %1 = bitcast <1 x i64> %a to <2 x i32>
122  %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
123  %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
124  %2 = tail call x86_mmx @llvm.x86.mmx.punpckldq(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
125  %3 = bitcast x86_mmx %2 to <2 x i32>
126  %4 = bitcast <2 x i32> %3 to <1 x i64>
127  %5 = extractelement <1 x i64> %4, i32 0
128  ret i64 %5
129}
130
131declare x86_mmx @llvm.x86.mmx.punpcklwd(x86_mmx, x86_mmx) nounwind readnone
132
133define i64 @test81(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
134; CHECK: punpcklwd
135entry:
136  %0 = bitcast <1 x i64> %b to <4 x i16>
137  %1 = bitcast <1 x i64> %a to <4 x i16>
138  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
139  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
140  %2 = tail call x86_mmx @llvm.x86.mmx.punpcklwd(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
141  %3 = bitcast x86_mmx %2 to <4 x i16>
142  %4 = bitcast <4 x i16> %3 to <1 x i64>
143  %5 = extractelement <1 x i64> %4, i32 0
144  ret i64 %5
145}
146
147declare x86_mmx @llvm.x86.mmx.punpcklbw(x86_mmx, x86_mmx) nounwind readnone
148
149define i64 @test80(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
150; CHECK: punpcklbw
151entry:
152  %0 = bitcast <1 x i64> %b to <8 x i8>
153  %1 = bitcast <1 x i64> %a to <8 x i8>
154  %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
155  %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
156  %2 = tail call x86_mmx @llvm.x86.mmx.punpcklbw(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
157  %3 = bitcast x86_mmx %2 to <8 x i8>
158  %4 = bitcast <8 x i8> %3 to <1 x i64>
159  %5 = extractelement <1 x i64> %4, i32 0
160  ret i64 %5
161}
162
163declare x86_mmx @llvm.x86.mmx.punpckhdq(x86_mmx, x86_mmx) nounwind readnone
164
165define i64 @test79(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
166; CHECK: punpckhdq
167entry:
168  %0 = bitcast <1 x i64> %b to <2 x i32>
169  %1 = bitcast <1 x i64> %a to <2 x i32>
170  %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
171  %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
172  %2 = tail call x86_mmx @llvm.x86.mmx.punpckhdq(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
173  %3 = bitcast x86_mmx %2 to <2 x i32>
174  %4 = bitcast <2 x i32> %3 to <1 x i64>
175  %5 = extractelement <1 x i64> %4, i32 0
176  ret i64 %5
177}
178
179declare x86_mmx @llvm.x86.mmx.punpckhwd(x86_mmx, x86_mmx) nounwind readnone
180
181define i64 @test78(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
182; CHECK: punpckhwd
183entry:
184  %0 = bitcast <1 x i64> %b to <4 x i16>
185  %1 = bitcast <1 x i64> %a to <4 x i16>
186  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
187  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
188  %2 = tail call x86_mmx @llvm.x86.mmx.punpckhwd(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
189  %3 = bitcast x86_mmx %2 to <4 x i16>
190  %4 = bitcast <4 x i16> %3 to <1 x i64>
191  %5 = extractelement <1 x i64> %4, i32 0
192  ret i64 %5
193}
194
195declare x86_mmx @llvm.x86.mmx.punpckhbw(x86_mmx, x86_mmx) nounwind readnone
196
197define i64 @test77(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
198; CHECK: punpckhbw
199entry:
200  %0 = bitcast <1 x i64> %b to <8 x i8>
201  %1 = bitcast <1 x i64> %a to <8 x i8>
202  %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
203  %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
204  %2 = tail call x86_mmx @llvm.x86.mmx.punpckhbw(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
205  %3 = bitcast x86_mmx %2 to <8 x i8>
206  %4 = bitcast <8 x i8> %3 to <1 x i64>
207  %5 = extractelement <1 x i64> %4, i32 0
208  ret i64 %5
209}
210
211declare x86_mmx @llvm.x86.mmx.packuswb(x86_mmx, x86_mmx) nounwind readnone
212
213define i64 @test76(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
214; CHECK: packuswb
215entry:
216  %0 = bitcast <1 x i64> %b to <4 x i16>
217  %1 = bitcast <1 x i64> %a to <4 x i16>
218  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
219  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
220  %2 = tail call x86_mmx @llvm.x86.mmx.packuswb(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
221  %3 = bitcast x86_mmx %2 to <8 x i8>
222  %4 = bitcast <8 x i8> %3 to <1 x i64>
223  %5 = extractelement <1 x i64> %4, i32 0
224  ret i64 %5
225}
226
227declare x86_mmx @llvm.x86.mmx.packssdw(x86_mmx, x86_mmx) nounwind readnone
228
229define i64 @test75(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
230; CHECK: packssdw
231entry:
232  %0 = bitcast <1 x i64> %b to <2 x i32>
233  %1 = bitcast <1 x i64> %a to <2 x i32>
234  %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
235  %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
236  %2 = tail call x86_mmx @llvm.x86.mmx.packssdw(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
237  %3 = bitcast x86_mmx %2 to <4 x i16>
238  %4 = bitcast <4 x i16> %3 to <1 x i64>
239  %5 = extractelement <1 x i64> %4, i32 0
240  ret i64 %5
241}
242
243declare x86_mmx @llvm.x86.mmx.packsswb(x86_mmx, x86_mmx) nounwind readnone
244
245define i64 @test74(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
246; CHECK: packsswb
247entry:
248  %0 = bitcast <1 x i64> %b to <4 x i16>
249  %1 = bitcast <1 x i64> %a to <4 x i16>
250  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
251  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
252  %2 = tail call x86_mmx @llvm.x86.mmx.packsswb(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
253  %3 = bitcast x86_mmx %2 to <8 x i8>
254  %4 = bitcast <8 x i8> %3 to <1 x i64>
255  %5 = extractelement <1 x i64> %4, i32 0
256  ret i64 %5
257}
258
259declare x86_mmx @llvm.x86.mmx.psrai.d(x86_mmx, i32) nounwind readnone
260
261define i64 @test73(<1 x i64> %a) nounwind readnone optsize ssp {
262; CHECK: psrad
263entry:
264  %0 = bitcast <1 x i64> %a to <2 x i32>
265  %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx
266  %1 = tail call x86_mmx @llvm.x86.mmx.psrai.d(x86_mmx %mmx_var.i, i32 3) nounwind
267  %2 = bitcast x86_mmx %1 to <2 x i32>
268  %3 = bitcast <2 x i32> %2 to <1 x i64>
269  %4 = extractelement <1 x i64> %3, i32 0
270  ret i64 %4
271}
272
273declare x86_mmx @llvm.x86.mmx.psrai.w(x86_mmx, i32) nounwind readnone
274
275define i64 @test72(<1 x i64> %a) nounwind readnone optsize ssp {
276; CHECK: psraw
277entry:
278  %0 = bitcast <1 x i64> %a to <4 x i16>
279  %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx
280  %1 = tail call x86_mmx @llvm.x86.mmx.psrai.w(x86_mmx %mmx_var.i, i32 3) nounwind
281  %2 = bitcast x86_mmx %1 to <4 x i16>
282  %3 = bitcast <4 x i16> %2 to <1 x i64>
283  %4 = extractelement <1 x i64> %3, i32 0
284  ret i64 %4
285}
286
287declare x86_mmx @llvm.x86.mmx.psrli.q(x86_mmx, i32) nounwind readnone
288
289define i64 @test71(<1 x i64> %a) nounwind readnone optsize ssp {
290; CHECK: psrlq
291entry:
292  %0 = extractelement <1 x i64> %a, i32 0
293  %mmx_var.i = bitcast i64 %0 to x86_mmx
294  %1 = tail call x86_mmx @llvm.x86.mmx.psrli.q(x86_mmx %mmx_var.i, i32 3) nounwind
295  %2 = bitcast x86_mmx %1 to i64
296  ret i64 %2
297}
298
299declare x86_mmx @llvm.x86.mmx.psrli.d(x86_mmx, i32) nounwind readnone
300
301define i64 @test70(<1 x i64> %a) nounwind readnone optsize ssp {
302; CHECK: psrld
303entry:
304  %0 = bitcast <1 x i64> %a to <2 x i32>
305  %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx
306  %1 = tail call x86_mmx @llvm.x86.mmx.psrli.d(x86_mmx %mmx_var.i, i32 3) nounwind
307  %2 = bitcast x86_mmx %1 to <2 x i32>
308  %3 = bitcast <2 x i32> %2 to <1 x i64>
309  %4 = extractelement <1 x i64> %3, i32 0
310  ret i64 %4
311}
312
313declare x86_mmx @llvm.x86.mmx.psrli.w(x86_mmx, i32) nounwind readnone
314
315define i64 @test69(<1 x i64> %a) nounwind readnone optsize ssp {
316; CHECK: psrlw
317entry:
318  %0 = bitcast <1 x i64> %a to <4 x i16>
319  %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx
320  %1 = tail call x86_mmx @llvm.x86.mmx.psrli.w(x86_mmx %mmx_var.i, i32 3) nounwind
321  %2 = bitcast x86_mmx %1 to <4 x i16>
322  %3 = bitcast <4 x i16> %2 to <1 x i64>
323  %4 = extractelement <1 x i64> %3, i32 0
324  ret i64 %4
325}
326
327declare x86_mmx @llvm.x86.mmx.pslli.q(x86_mmx, i32) nounwind readnone
328
329define i64 @test68(<1 x i64> %a) nounwind readnone optsize ssp {
330; CHECK: psllq
331entry:
332  %0 = extractelement <1 x i64> %a, i32 0
333  %mmx_var.i = bitcast i64 %0 to x86_mmx
334  %1 = tail call x86_mmx @llvm.x86.mmx.pslli.q(x86_mmx %mmx_var.i, i32 3) nounwind
335  %2 = bitcast x86_mmx %1 to i64
336  ret i64 %2
337}
338
339declare x86_mmx @llvm.x86.mmx.pslli.d(x86_mmx, i32) nounwind readnone
340
341define i64 @test67(<1 x i64> %a) nounwind readnone optsize ssp {
342; CHECK: pslld
343entry:
344  %0 = bitcast <1 x i64> %a to <2 x i32>
345  %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx
346  %1 = tail call x86_mmx @llvm.x86.mmx.pslli.d(x86_mmx %mmx_var.i, i32 3) nounwind
347  %2 = bitcast x86_mmx %1 to <2 x i32>
348  %3 = bitcast <2 x i32> %2 to <1 x i64>
349  %4 = extractelement <1 x i64> %3, i32 0
350  ret i64 %4
351}
352
353declare x86_mmx @llvm.x86.mmx.pslli.w(x86_mmx, i32) nounwind readnone
354
355define i64 @test66(<1 x i64> %a) nounwind readnone optsize ssp {
356; CHECK: psllw
357entry:
358  %0 = bitcast <1 x i64> %a to <4 x i16>
359  %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx
360  %1 = tail call x86_mmx @llvm.x86.mmx.pslli.w(x86_mmx %mmx_var.i, i32 3) nounwind
361  %2 = bitcast x86_mmx %1 to <4 x i16>
362  %3 = bitcast <4 x i16> %2 to <1 x i64>
363  %4 = extractelement <1 x i64> %3, i32 0
364  ret i64 %4
365}
366
367declare x86_mmx @llvm.x86.mmx.psra.d(x86_mmx, x86_mmx) nounwind readnone
368
369define i64 @test65(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
370; CHECK: psrad
371entry:
372  %0 = bitcast <1 x i64> %a to <2 x i32>
373  %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx
374  %1 = extractelement <1 x i64> %b, i32 0
375  %mmx_var1.i = bitcast i64 %1 to x86_mmx
376  %2 = tail call x86_mmx @llvm.x86.mmx.psra.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
377  %3 = bitcast x86_mmx %2 to <2 x i32>
378  %4 = bitcast <2 x i32> %3 to <1 x i64>
379  %5 = extractelement <1 x i64> %4, i32 0
380  ret i64 %5
381}
382
383declare x86_mmx @llvm.x86.mmx.psra.w(x86_mmx, x86_mmx) nounwind readnone
384
385define i64 @test64(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
386; CHECK: psraw
387entry:
388  %0 = bitcast <1 x i64> %a to <4 x i16>
389  %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx
390  %1 = extractelement <1 x i64> %b, i32 0
391  %mmx_var1.i = bitcast i64 %1 to x86_mmx
392  %2 = tail call x86_mmx @llvm.x86.mmx.psra.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
393  %3 = bitcast x86_mmx %2 to <4 x i16>
394  %4 = bitcast <4 x i16> %3 to <1 x i64>
395  %5 = extractelement <1 x i64> %4, i32 0
396  ret i64 %5
397}
398
399declare x86_mmx @llvm.x86.mmx.psrl.q(x86_mmx, x86_mmx) nounwind readnone
400
401define i64 @test63(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
402; CHECK: psrlq
403entry:
404  %0 = extractelement <1 x i64> %a, i32 0
405  %mmx_var.i = bitcast i64 %0 to x86_mmx
406  %1 = extractelement <1 x i64> %b, i32 0
407  %mmx_var1.i = bitcast i64 %1 to x86_mmx
408  %2 = tail call x86_mmx @llvm.x86.mmx.psrl.q(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
409  %3 = bitcast x86_mmx %2 to i64
410  ret i64 %3
411}
412
413declare x86_mmx @llvm.x86.mmx.psrl.d(x86_mmx, x86_mmx) nounwind readnone
414
415define i64 @test62(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
416; CHECK: psrld
417entry:
418  %0 = bitcast <1 x i64> %a to <2 x i32>
419  %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx
420  %1 = extractelement <1 x i64> %b, i32 0
421  %mmx_var1.i = bitcast i64 %1 to x86_mmx
422  %2 = tail call x86_mmx @llvm.x86.mmx.psrl.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
423  %3 = bitcast x86_mmx %2 to <2 x i32>
424  %4 = bitcast <2 x i32> %3 to <1 x i64>
425  %5 = extractelement <1 x i64> %4, i32 0
426  ret i64 %5
427}
428
429declare x86_mmx @llvm.x86.mmx.psrl.w(x86_mmx, x86_mmx) nounwind readnone
430
431define i64 @test61(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
432; CHECK: psrlw
433entry:
434  %0 = bitcast <1 x i64> %a to <4 x i16>
435  %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx
436  %1 = extractelement <1 x i64> %b, i32 0
437  %mmx_var1.i = bitcast i64 %1 to x86_mmx
438  %2 = tail call x86_mmx @llvm.x86.mmx.psrl.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
439  %3 = bitcast x86_mmx %2 to <4 x i16>
440  %4 = bitcast <4 x i16> %3 to <1 x i64>
441  %5 = extractelement <1 x i64> %4, i32 0
442  ret i64 %5
443}
444
445declare x86_mmx @llvm.x86.mmx.psll.q(x86_mmx, x86_mmx) nounwind readnone
446
447define i64 @test60(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
448; CHECK: psllq
449entry:
450  %0 = extractelement <1 x i64> %a, i32 0
451  %mmx_var.i = bitcast i64 %0 to x86_mmx
452  %1 = extractelement <1 x i64> %b, i32 0
453  %mmx_var1.i = bitcast i64 %1 to x86_mmx
454  %2 = tail call x86_mmx @llvm.x86.mmx.psll.q(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
455  %3 = bitcast x86_mmx %2 to i64
456  ret i64 %3
457}
458
459declare x86_mmx @llvm.x86.mmx.psll.d(x86_mmx, x86_mmx) nounwind readnone
460
461define i64 @test59(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
462; CHECK: pslld
463entry:
464  %0 = bitcast <1 x i64> %a to <2 x i32>
465  %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx
466  %1 = extractelement <1 x i64> %b, i32 0
467  %mmx_var1.i = bitcast i64 %1 to x86_mmx
468  %2 = tail call x86_mmx @llvm.x86.mmx.psll.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
469  %3 = bitcast x86_mmx %2 to <2 x i32>
470  %4 = bitcast <2 x i32> %3 to <1 x i64>
471  %5 = extractelement <1 x i64> %4, i32 0
472  ret i64 %5
473}
474
475declare x86_mmx @llvm.x86.mmx.psll.w(x86_mmx, x86_mmx) nounwind readnone
476
477define i64 @test58(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
478; CHECK: psllw
479entry:
480  %0 = bitcast <1 x i64> %a to <4 x i16>
481  %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx
482  %1 = extractelement <1 x i64> %b, i32 0
483  %mmx_var1.i = bitcast i64 %1 to x86_mmx
484  %2 = tail call x86_mmx @llvm.x86.mmx.psll.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
485  %3 = bitcast x86_mmx %2 to <4 x i16>
486  %4 = bitcast <4 x i16> %3 to <1 x i64>
487  %5 = extractelement <1 x i64> %4, i32 0
488  ret i64 %5
489}
490
491declare x86_mmx @llvm.x86.mmx.pxor(x86_mmx, x86_mmx) nounwind readnone
492
493define i64 @test56(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
494; CHECK: pxor
495entry:
496  %0 = bitcast <1 x i64> %b to <2 x i32>
497  %1 = bitcast <1 x i64> %a to <2 x i32>
498  %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
499  %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
500  %2 = tail call x86_mmx @llvm.x86.mmx.pxor(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
501  %3 = bitcast x86_mmx %2 to <2 x i32>
502  %4 = bitcast <2 x i32> %3 to <1 x i64>
503  %5 = extractelement <1 x i64> %4, i32 0
504  ret i64 %5
505}
506
507declare x86_mmx @llvm.x86.mmx.por(x86_mmx, x86_mmx) nounwind readnone
508
509define i64 @test55(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
510; CHECK: por
511entry:
512  %0 = bitcast <1 x i64> %b to <2 x i32>
513  %1 = bitcast <1 x i64> %a to <2 x i32>
514  %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
515  %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
516  %2 = tail call x86_mmx @llvm.x86.mmx.por(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
517  %3 = bitcast x86_mmx %2 to <2 x i32>
518  %4 = bitcast <2 x i32> %3 to <1 x i64>
519  %5 = extractelement <1 x i64> %4, i32 0
520  ret i64 %5
521}
522
523declare x86_mmx @llvm.x86.mmx.pandn(x86_mmx, x86_mmx) nounwind readnone
524
525define i64 @test54(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
526; CHECK: pandn
527entry:
528  %0 = bitcast <1 x i64> %b to <2 x i32>
529  %1 = bitcast <1 x i64> %a to <2 x i32>
530  %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
531  %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
532  %2 = tail call x86_mmx @llvm.x86.mmx.pandn(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
533  %3 = bitcast x86_mmx %2 to <2 x i32>
534  %4 = bitcast <2 x i32> %3 to <1 x i64>
535  %5 = extractelement <1 x i64> %4, i32 0
536  ret i64 %5
537}
538
539declare x86_mmx @llvm.x86.mmx.pand(x86_mmx, x86_mmx) nounwind readnone
540
541define i64 @test53(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
542; CHECK: pand
543entry:
544  %0 = bitcast <1 x i64> %b to <2 x i32>
545  %1 = bitcast <1 x i64> %a to <2 x i32>
546  %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
547  %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
548  %2 = tail call x86_mmx @llvm.x86.mmx.pand(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
549  %3 = bitcast x86_mmx %2 to <2 x i32>
550  %4 = bitcast <2 x i32> %3 to <1 x i64>
551  %5 = extractelement <1 x i64> %4, i32 0
552  ret i64 %5
553}
554
555declare x86_mmx @llvm.x86.mmx.pmull.w(x86_mmx, x86_mmx) nounwind readnone
556
557define i64 @test52(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
558; CHECK: pmullw
559entry:
560  %0 = bitcast <1 x i64> %b to <4 x i16>
561  %1 = bitcast <1 x i64> %a to <4 x i16>
562  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
563  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
564  %2 = tail call x86_mmx @llvm.x86.mmx.pmull.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
565  %3 = bitcast x86_mmx %2 to <4 x i16>
566  %4 = bitcast <4 x i16> %3 to <1 x i64>
567  %5 = extractelement <1 x i64> %4, i32 0
568  ret i64 %5
569}
570
571define i64 @test51(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
572; CHECK: pmullw
573entry:
574  %0 = bitcast <1 x i64> %b to <4 x i16>
575  %1 = bitcast <1 x i64> %a to <4 x i16>
576  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
577  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
578  %2 = tail call x86_mmx @llvm.x86.mmx.pmull.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
579  %3 = bitcast x86_mmx %2 to <4 x i16>
580  %4 = bitcast <4 x i16> %3 to <1 x i64>
581  %5 = extractelement <1 x i64> %4, i32 0
582  ret i64 %5
583}
584
585declare x86_mmx @llvm.x86.mmx.pmulh.w(x86_mmx, x86_mmx) nounwind readnone
586
587define i64 @test50(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
588; CHECK: pmulhw
589entry:
590  %0 = bitcast <1 x i64> %b to <4 x i16>
591  %1 = bitcast <1 x i64> %a to <4 x i16>
592  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
593  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
594  %2 = tail call x86_mmx @llvm.x86.mmx.pmulh.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
595  %3 = bitcast x86_mmx %2 to <4 x i16>
596  %4 = bitcast <4 x i16> %3 to <1 x i64>
597  %5 = extractelement <1 x i64> %4, i32 0
598  ret i64 %5
599}
600
601declare x86_mmx @llvm.x86.mmx.pmadd.wd(x86_mmx, x86_mmx) nounwind readnone
602
603define i64 @test49(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
604; CHECK: pmaddwd
605entry:
606  %0 = bitcast <1 x i64> %b to <4 x i16>
607  %1 = bitcast <1 x i64> %a to <4 x i16>
608  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
609  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
610  %2 = tail call x86_mmx @llvm.x86.mmx.pmadd.wd(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
611  %3 = bitcast x86_mmx %2 to <2 x i32>
612  %4 = bitcast <2 x i32> %3 to <1 x i64>
613  %5 = extractelement <1 x i64> %4, i32 0
614  ret i64 %5
615}
616
617declare x86_mmx @llvm.x86.mmx.psubus.w(x86_mmx, x86_mmx) nounwind readnone
618
619define i64 @test48(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
620; CHECK: psubusw
621entry:
622  %0 = bitcast <1 x i64> %b to <4 x i16>
623  %1 = bitcast <1 x i64> %a to <4 x i16>
624  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
625  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
626  %2 = tail call x86_mmx @llvm.x86.mmx.psubus.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
627  %3 = bitcast x86_mmx %2 to <4 x i16>
628  %4 = bitcast <4 x i16> %3 to <1 x i64>
629  %5 = extractelement <1 x i64> %4, i32 0
630  ret i64 %5
631}
632
633declare x86_mmx @llvm.x86.mmx.psubus.b(x86_mmx, x86_mmx) nounwind readnone
634
635define i64 @test47(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
636; CHECK: psubusb
637entry:
638  %0 = bitcast <1 x i64> %b to <8 x i8>
639  %1 = bitcast <1 x i64> %a to <8 x i8>
640  %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
641  %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
642  %2 = tail call x86_mmx @llvm.x86.mmx.psubus.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
643  %3 = bitcast x86_mmx %2 to <8 x i8>
644  %4 = bitcast <8 x i8> %3 to <1 x i64>
645  %5 = extractelement <1 x i64> %4, i32 0
646  ret i64 %5
647}
648
649declare x86_mmx @llvm.x86.mmx.psubs.w(x86_mmx, x86_mmx) nounwind readnone
650
651define i64 @test46(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
652; CHECK: psubsw
653entry:
654  %0 = bitcast <1 x i64> %b to <4 x i16>
655  %1 = bitcast <1 x i64> %a to <4 x i16>
656  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
657  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
658  %2 = tail call x86_mmx @llvm.x86.mmx.psubs.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
659  %3 = bitcast x86_mmx %2 to <4 x i16>
660  %4 = bitcast <4 x i16> %3 to <1 x i64>
661  %5 = extractelement <1 x i64> %4, i32 0
662  ret i64 %5
663}
664
665declare x86_mmx @llvm.x86.mmx.psubs.b(x86_mmx, x86_mmx) nounwind readnone
666
667define i64 @test45(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
668; CHECK: psubsb
669entry:
670  %0 = bitcast <1 x i64> %b to <8 x i8>
671  %1 = bitcast <1 x i64> %a to <8 x i8>
672  %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
673  %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
674  %2 = tail call x86_mmx @llvm.x86.mmx.psubs.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
675  %3 = bitcast x86_mmx %2 to <8 x i8>
676  %4 = bitcast <8 x i8> %3 to <1 x i64>
677  %5 = extractelement <1 x i64> %4, i32 0
678  ret i64 %5
679}
680
681define i64 @test44(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
682; CHECK: psubq
683entry:
684  %0 = extractelement <1 x i64> %a, i32 0
685  %mmx_var = bitcast i64 %0 to x86_mmx
686  %1 = extractelement <1 x i64> %b, i32 0
687  %mmx_var1 = bitcast i64 %1 to x86_mmx
688  %2 = tail call x86_mmx @llvm.x86.mmx.psub.q(x86_mmx %mmx_var, x86_mmx %mmx_var1)
689  %3 = bitcast x86_mmx %2 to i64
690  ret i64 %3
691}
692
693declare x86_mmx @llvm.x86.mmx.psub.q(x86_mmx, x86_mmx) nounwind readnone
694
695declare x86_mmx @llvm.x86.mmx.psub.d(x86_mmx, x86_mmx) nounwind readnone
696
697define i64 @test43(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
698; CHECK: psubd
699entry:
700  %0 = bitcast <1 x i64> %b to <2 x i32>
701  %1 = bitcast <1 x i64> %a to <2 x i32>
702  %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
703  %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
704  %2 = tail call x86_mmx @llvm.x86.mmx.psub.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
705  %3 = bitcast x86_mmx %2 to <2 x i32>
706  %4 = bitcast <2 x i32> %3 to <1 x i64>
707  %5 = extractelement <1 x i64> %4, i32 0
708  ret i64 %5
709}
710
711declare x86_mmx @llvm.x86.mmx.psub.w(x86_mmx, x86_mmx) nounwind readnone
712
713define i64 @test42(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
714; CHECK: psubw
715entry:
716  %0 = bitcast <1 x i64> %b to <4 x i16>
717  %1 = bitcast <1 x i64> %a to <4 x i16>
718  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
719  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
720  %2 = tail call x86_mmx @llvm.x86.mmx.psub.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
721  %3 = bitcast x86_mmx %2 to <4 x i16>
722  %4 = bitcast <4 x i16> %3 to <1 x i64>
723  %5 = extractelement <1 x i64> %4, i32 0
724  ret i64 %5
725}
726
727declare x86_mmx @llvm.x86.mmx.psub.b(x86_mmx, x86_mmx) nounwind readnone
728
729define i64 @test41(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
730; CHECK: psubb
731entry:
732  %0 = bitcast <1 x i64> %b to <8 x i8>
733  %1 = bitcast <1 x i64> %a to <8 x i8>
734  %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
735  %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
736  %2 = tail call x86_mmx @llvm.x86.mmx.psub.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
737  %3 = bitcast x86_mmx %2 to <8 x i8>
738  %4 = bitcast <8 x i8> %3 to <1 x i64>
739  %5 = extractelement <1 x i64> %4, i32 0
740  ret i64 %5
741}
742
743declare x86_mmx @llvm.x86.mmx.paddus.w(x86_mmx, x86_mmx) nounwind readnone
744
745define i64 @test40(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
746; CHECK: paddusw
747entry:
748  %0 = bitcast <1 x i64> %b to <4 x i16>
749  %1 = bitcast <1 x i64> %a to <4 x i16>
750  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
751  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
752  %2 = tail call x86_mmx @llvm.x86.mmx.paddus.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
753  %3 = bitcast x86_mmx %2 to <4 x i16>
754  %4 = bitcast <4 x i16> %3 to <1 x i64>
755  %5 = extractelement <1 x i64> %4, i32 0
756  ret i64 %5
757}
758
759declare x86_mmx @llvm.x86.mmx.paddus.b(x86_mmx, x86_mmx) nounwind readnone
760
761define i64 @test39(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
762; CHECK: paddusb
763entry:
764  %0 = bitcast <1 x i64> %b to <8 x i8>
765  %1 = bitcast <1 x i64> %a to <8 x i8>
766  %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
767  %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
768  %2 = tail call x86_mmx @llvm.x86.mmx.paddus.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
769  %3 = bitcast x86_mmx %2 to <8 x i8>
770  %4 = bitcast <8 x i8> %3 to <1 x i64>
771  %5 = extractelement <1 x i64> %4, i32 0
772  ret i64 %5
773}
774
775declare x86_mmx @llvm.x86.mmx.padds.w(x86_mmx, x86_mmx) nounwind readnone
776
777define i64 @test38(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
778; CHECK: paddsw
779entry:
780  %0 = bitcast <1 x i64> %b to <4 x i16>
781  %1 = bitcast <1 x i64> %a to <4 x i16>
782  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
783  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
784  %2 = tail call x86_mmx @llvm.x86.mmx.padds.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
785  %3 = bitcast x86_mmx %2 to <4 x i16>
786  %4 = bitcast <4 x i16> %3 to <1 x i64>
787  %5 = extractelement <1 x i64> %4, i32 0
788  ret i64 %5
789}
790
791declare x86_mmx @llvm.x86.mmx.padds.b(x86_mmx, x86_mmx) nounwind readnone
792
793define i64 @test37(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
794; CHECK: paddsb
795entry:
796  %0 = bitcast <1 x i64> %b to <8 x i8>
797  %1 = bitcast <1 x i64> %a to <8 x i8>
798  %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
799  %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
800  %2 = tail call x86_mmx @llvm.x86.mmx.padds.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
801  %3 = bitcast x86_mmx %2 to <8 x i8>
802  %4 = bitcast <8 x i8> %3 to <1 x i64>
803  %5 = extractelement <1 x i64> %4, i32 0
804  ret i64 %5
805}
806
807declare x86_mmx @llvm.x86.mmx.padd.q(x86_mmx, x86_mmx) nounwind readnone
808
809define i64 @test36(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
810; CHECK: paddq
811entry:
812  %0 = extractelement <1 x i64> %a, i32 0
813  %mmx_var = bitcast i64 %0 to x86_mmx
814  %1 = extractelement <1 x i64> %b, i32 0
815  %mmx_var1 = bitcast i64 %1 to x86_mmx
816  %2 = tail call x86_mmx @llvm.x86.mmx.padd.q(x86_mmx %mmx_var, x86_mmx %mmx_var1)
817  %3 = bitcast x86_mmx %2 to i64
818  ret i64 %3
819}
820
821declare x86_mmx @llvm.x86.mmx.padd.d(x86_mmx, x86_mmx) nounwind readnone
822
823define i64 @test35(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
824; CHECK: paddd
825entry:
826  %0 = bitcast <1 x i64> %b to <2 x i32>
827  %1 = bitcast <1 x i64> %a to <2 x i32>
828  %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
829  %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
830  %2 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
831  %3 = bitcast x86_mmx %2 to <2 x i32>
832  %4 = bitcast <2 x i32> %3 to <1 x i64>
833  %5 = extractelement <1 x i64> %4, i32 0
834  ret i64 %5
835}
836
837declare x86_mmx @llvm.x86.mmx.padd.w(x86_mmx, x86_mmx) nounwind readnone
838
839define i64 @test34(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
840; CHECK: paddw
841entry:
842  %0 = bitcast <1 x i64> %b to <4 x i16>
843  %1 = bitcast <1 x i64> %a to <4 x i16>
844  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
845  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
846  %2 = tail call x86_mmx @llvm.x86.mmx.padd.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
847  %3 = bitcast x86_mmx %2 to <4 x i16>
848  %4 = bitcast <4 x i16> %3 to <1 x i64>
849  %5 = extractelement <1 x i64> %4, i32 0
850  ret i64 %5
851}
852
853declare x86_mmx @llvm.x86.mmx.padd.b(x86_mmx, x86_mmx) nounwind readnone
854
855define i64 @test33(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
856; CHECK: paddb
857entry:
858  %0 = bitcast <1 x i64> %b to <8 x i8>
859  %1 = bitcast <1 x i64> %a to <8 x i8>
860  %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
861  %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
862  %2 = tail call x86_mmx @llvm.x86.mmx.padd.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
863  %3 = bitcast x86_mmx %2 to <8 x i8>
864  %4 = bitcast <8 x i8> %3 to <1 x i64>
865  %5 = extractelement <1 x i64> %4, i32 0
866  ret i64 %5
867}
868
869declare x86_mmx @llvm.x86.mmx.psad.bw(x86_mmx, x86_mmx) nounwind readnone
870
871define i64 @test32(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
872; CHECK: psadbw
873entry:
874  %0 = bitcast <1 x i64> %b to <8 x i8>
875  %1 = bitcast <1 x i64> %a to <8 x i8>
876  %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
877  %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
878  %2 = tail call x86_mmx @llvm.x86.mmx.psad.bw(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
879  %3 = bitcast x86_mmx %2 to i64
880  ret i64 %3
881}
882
883declare x86_mmx @llvm.x86.mmx.pmins.w(x86_mmx, x86_mmx) nounwind readnone
884
885define i64 @test31(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
886; CHECK: pminsw
887entry:
888  %0 = bitcast <1 x i64> %b to <4 x i16>
889  %1 = bitcast <1 x i64> %a to <4 x i16>
890  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
891  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
892  %2 = tail call x86_mmx @llvm.x86.mmx.pmins.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
893  %3 = bitcast x86_mmx %2 to <4 x i16>
894  %4 = bitcast <4 x i16> %3 to <1 x i64>
895  %5 = extractelement <1 x i64> %4, i32 0
896  ret i64 %5
897}
898
899declare x86_mmx @llvm.x86.mmx.pminu.b(x86_mmx, x86_mmx) nounwind readnone
900
901define i64 @test30(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
902; CHECK: pminub
903entry:
904  %0 = bitcast <1 x i64> %b to <8 x i8>
905  %1 = bitcast <1 x i64> %a to <8 x i8>
906  %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
907  %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
908  %2 = tail call x86_mmx @llvm.x86.mmx.pminu.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
909  %3 = bitcast x86_mmx %2 to <8 x i8>
910  %4 = bitcast <8 x i8> %3 to <1 x i64>
911  %5 = extractelement <1 x i64> %4, i32 0
912  ret i64 %5
913}
914
915declare x86_mmx @llvm.x86.mmx.pmaxs.w(x86_mmx, x86_mmx) nounwind readnone
916
917define i64 @test29(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
918; CHECK: pmaxsw
919entry:
920  %0 = bitcast <1 x i64> %b to <4 x i16>
921  %1 = bitcast <1 x i64> %a to <4 x i16>
922  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
923  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
924  %2 = tail call x86_mmx @llvm.x86.mmx.pmaxs.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
925  %3 = bitcast x86_mmx %2 to <4 x i16>
926  %4 = bitcast <4 x i16> %3 to <1 x i64>
927  %5 = extractelement <1 x i64> %4, i32 0
928  ret i64 %5
929}
930
931declare x86_mmx @llvm.x86.mmx.pmaxu.b(x86_mmx, x86_mmx) nounwind readnone
932
933define i64 @test28(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
934; CHECK: pmaxub
935entry:
936  %0 = bitcast <1 x i64> %b to <8 x i8>
937  %1 = bitcast <1 x i64> %a to <8 x i8>
938  %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
939  %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
940  %2 = tail call x86_mmx @llvm.x86.mmx.pmaxu.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
941  %3 = bitcast x86_mmx %2 to <8 x i8>
942  %4 = bitcast <8 x i8> %3 to <1 x i64>
943  %5 = extractelement <1 x i64> %4, i32 0
944  ret i64 %5
945}
946
947declare x86_mmx @llvm.x86.mmx.pavg.w(x86_mmx, x86_mmx) nounwind readnone
948
949define i64 @test27(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
950; CHECK: pavgw
951entry:
952  %0 = bitcast <1 x i64> %b to <4 x i16>
953  %1 = bitcast <1 x i64> %a to <4 x i16>
954  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
955  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
956  %2 = tail call x86_mmx @llvm.x86.mmx.pavg.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
957  %3 = bitcast x86_mmx %2 to <4 x i16>
958  %4 = bitcast <4 x i16> %3 to <1 x i64>
959  %5 = extractelement <1 x i64> %4, i32 0
960  ret i64 %5
961}
962
963declare x86_mmx @llvm.x86.mmx.pavg.b(x86_mmx, x86_mmx) nounwind readnone
964
965define i64 @test26(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
966; CHECK: pavgb
967entry:
968  %0 = bitcast <1 x i64> %b to <8 x i8>
969  %1 = bitcast <1 x i64> %a to <8 x i8>
970  %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
971  %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
972  %2 = tail call x86_mmx @llvm.x86.mmx.pavg.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
973  %3 = bitcast x86_mmx %2 to <8 x i8>
974  %4 = bitcast <8 x i8> %3 to <1 x i64>
975  %5 = extractelement <1 x i64> %4, i32 0
976  ret i64 %5
977}
978
979declare void @llvm.x86.mmx.movnt.dq(x86_mmx*, x86_mmx) nounwind
980
981define void @test25(<1 x i64>* %p, <1 x i64> %a) nounwind optsize ssp {
982; CHECK: movntq
983entry:
984  %mmx_ptr_var.i = bitcast <1 x i64>* %p to x86_mmx*
985  %0 = extractelement <1 x i64> %a, i32 0
986  %mmx_var.i = bitcast i64 %0 to x86_mmx
987  tail call void @llvm.x86.mmx.movnt.dq(x86_mmx* %mmx_ptr_var.i, x86_mmx %mmx_var.i) nounwind
988  ret void
989}
990
991declare i32 @llvm.x86.mmx.pmovmskb(x86_mmx) nounwind readnone
992
993define i32 @test24(<1 x i64> %a) nounwind readnone optsize ssp {
994; CHECK: pmovmskb
995entry:
996  %0 = bitcast <1 x i64> %a to <8 x i8>
997  %mmx_var.i = bitcast <8 x i8> %0 to x86_mmx
998  %1 = tail call i32 @llvm.x86.mmx.pmovmskb(x86_mmx %mmx_var.i) nounwind
999  ret i32 %1
1000}
1001
1002declare void @llvm.x86.mmx.maskmovq(x86_mmx, x86_mmx, i8*) nounwind
1003
1004define void @test23(<1 x i64> %d, <1 x i64> %n, i8* %p) nounwind optsize ssp {
1005; CHECK: maskmovq
1006entry:
1007  %0 = bitcast <1 x i64> %n to <8 x i8>
1008  %1 = bitcast <1 x i64> %d to <8 x i8>
1009  %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
1010  %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
1011  tail call void @llvm.x86.mmx.maskmovq(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i, i8* %p) nounwind
1012  ret void
1013}
1014
1015declare x86_mmx @llvm.x86.mmx.pmulhu.w(x86_mmx, x86_mmx) nounwind readnone
1016
1017define i64 @test22(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
1018; CHECK: pmulhuw
1019entry:
1020  %0 = bitcast <1 x i64> %b to <4 x i16>
1021  %1 = bitcast <1 x i64> %a to <4 x i16>
1022  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
1023  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
1024  %2 = tail call x86_mmx @llvm.x86.mmx.pmulhu.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
1025  %3 = bitcast x86_mmx %2 to <4 x i16>
1026  %4 = bitcast <4 x i16> %3 to <1 x i64>
1027  %5 = extractelement <1 x i64> %4, i32 0
1028  ret i64 %5
1029}
1030
1031declare x86_mmx @llvm.x86.sse.pshuf.w(x86_mmx, i8) nounwind readnone
1032
1033define i64 @test21(<1 x i64> %a) nounwind readnone optsize ssp {
1034; CHECK: pshufw
1035entry:
1036  %0 = bitcast <1 x i64> %a to <4 x i16>
1037  %1 = bitcast <4 x i16> %0 to x86_mmx
1038  %2 = tail call x86_mmx @llvm.x86.sse.pshuf.w(x86_mmx %1, i8 3) nounwind readnone
1039  %3 = bitcast x86_mmx %2 to <4 x i16>
1040  %4 = bitcast <4 x i16> %3 to <1 x i64>
1041  %5 = extractelement <1 x i64> %4, i32 0
1042  ret i64 %5
1043}
1044
1045declare x86_mmx @llvm.x86.mmx.pmulu.dq(x86_mmx, x86_mmx) nounwind readnone
1046
1047define i64 @test20(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
1048; CHECK: pmuludq
1049entry:
1050  %0 = bitcast <1 x i64> %b to <2 x i32>
1051  %1 = bitcast <1 x i64> %a to <2 x i32>
1052  %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
1053  %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
1054  %2 = tail call x86_mmx @llvm.x86.mmx.pmulu.dq(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
1055  %3 = bitcast x86_mmx %2 to i64
1056  ret i64 %3
1057}
1058
1059declare <2 x double> @llvm.x86.sse.cvtpi2pd(x86_mmx) nounwind readnone
1060
1061define <2 x double> @test19(<1 x i64> %a) nounwind readnone optsize ssp {
1062; CHECK: cvtpi2pd
1063entry:
1064  %0 = bitcast <1 x i64> %a to <2 x i32>
1065  %1 = bitcast <2 x i32> %0 to x86_mmx
1066  %2 = tail call <2 x double> @llvm.x86.sse.cvtpi2pd(x86_mmx %1) nounwind readnone
1067  ret <2 x double> %2
1068}
1069
1070declare x86_mmx @llvm.x86.sse.cvttpd2pi(<2 x double>) nounwind readnone
1071
1072define i64 @test18(<2 x double> %a) nounwind readnone optsize ssp {
1073; CHECK: cvttpd2pi
1074entry:
1075  %0 = tail call x86_mmx @llvm.x86.sse.cvttpd2pi(<2 x double> %a) nounwind readnone
1076  %1 = bitcast x86_mmx %0 to <2 x i32>
1077  %2 = bitcast <2 x i32> %1 to <1 x i64>
1078  %3 = extractelement <1 x i64> %2, i32 0
1079  ret i64 %3
1080}
1081
1082declare x86_mmx @llvm.x86.sse.cvtpd2pi(<2 x double>) nounwind readnone
1083
1084define i64 @test17(<2 x double> %a) nounwind readnone optsize ssp {
1085; CHECK: cvtpd2pi
1086entry:
1087  %0 = tail call x86_mmx @llvm.x86.sse.cvtpd2pi(<2 x double> %a) nounwind readnone
1088  %1 = bitcast x86_mmx %0 to <2 x i32>
1089  %2 = bitcast <2 x i32> %1 to <1 x i64>
1090  %3 = extractelement <1 x i64> %2, i32 0
1091  ret i64 %3
1092}
1093
1094declare x86_mmx @llvm.x86.mmx.palignr.b(x86_mmx, x86_mmx, i8) nounwind readnone
1095
1096define i64 @test16(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
1097; CHECK: palignr
1098entry:
1099  %0 = extractelement <1 x i64> %a, i32 0
1100  %mmx_var = bitcast i64 %0 to x86_mmx
1101  %1 = extractelement <1 x i64> %b, i32 0
1102  %mmx_var1 = bitcast i64 %1 to x86_mmx
1103  %2 = tail call x86_mmx @llvm.x86.mmx.palignr.b(x86_mmx %mmx_var, x86_mmx %mmx_var1, i8 16)
1104  %3 = bitcast x86_mmx %2 to i64
1105  ret i64 %3
1106}
1107
1108declare x86_mmx @llvm.x86.ssse3.pabs.d(x86_mmx) nounwind readnone
1109
1110define i64 @test15(<1 x i64> %a) nounwind readnone optsize ssp {
1111; CHECK: pabsd
1112entry:
1113  %0 = bitcast <1 x i64> %a to <2 x i32>
1114  %1 = bitcast <2 x i32> %0 to x86_mmx
1115  %2 = tail call x86_mmx @llvm.x86.ssse3.pabs.d(x86_mmx %1) nounwind readnone
1116  %3 = bitcast x86_mmx %2 to <2 x i32>
1117  %4 = bitcast <2 x i32> %3 to <1 x i64>
1118  %5 = extractelement <1 x i64> %4, i32 0
1119  ret i64 %5
1120}
1121
1122declare x86_mmx @llvm.x86.ssse3.pabs.w(x86_mmx) nounwind readnone
1123
1124define i64 @test14(<1 x i64> %a) nounwind readnone optsize ssp {
1125; CHECK: pabsw
1126entry:
1127  %0 = bitcast <1 x i64> %a to <4 x i16>
1128  %1 = bitcast <4 x i16> %0 to x86_mmx
1129  %2 = tail call x86_mmx @llvm.x86.ssse3.pabs.w(x86_mmx %1) nounwind readnone
1130  %3 = bitcast x86_mmx %2 to <4 x i16>
1131  %4 = bitcast <4 x i16> %3 to <1 x i64>
1132  %5 = extractelement <1 x i64> %4, i32 0
1133  ret i64 %5
1134}
1135
1136declare x86_mmx @llvm.x86.ssse3.pabs.b(x86_mmx) nounwind readnone
1137
1138define i64 @test13(<1 x i64> %a) nounwind readnone optsize ssp {
1139; CHECK: pabsb
1140entry:
1141  %0 = bitcast <1 x i64> %a to <8 x i8>
1142  %1 = bitcast <8 x i8> %0 to x86_mmx
1143  %2 = tail call x86_mmx @llvm.x86.ssse3.pabs.b(x86_mmx %1) nounwind readnone
1144  %3 = bitcast x86_mmx %2 to <8 x i8>
1145  %4 = bitcast <8 x i8> %3 to <1 x i64>
1146  %5 = extractelement <1 x i64> %4, i32 0
1147  ret i64 %5
1148}
1149
1150declare x86_mmx @llvm.x86.ssse3.psign.d(x86_mmx, x86_mmx) nounwind readnone
1151
1152define i64 @test12(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
1153; CHECK: psignd
1154entry:
1155  %0 = bitcast <1 x i64> %b to <2 x i32>
1156  %1 = bitcast <1 x i64> %a to <2 x i32>
1157  %2 = bitcast <2 x i32> %1 to x86_mmx
1158  %3 = bitcast <2 x i32> %0 to x86_mmx
1159  %4 = tail call x86_mmx @llvm.x86.ssse3.psign.d(x86_mmx %2, x86_mmx %3) nounwind readnone
1160  %5 = bitcast x86_mmx %4 to <2 x i32>
1161  %6 = bitcast <2 x i32> %5 to <1 x i64>
1162  %7 = extractelement <1 x i64> %6, i32 0
1163  ret i64 %7
1164}
1165
1166declare x86_mmx @llvm.x86.ssse3.psign.w(x86_mmx, x86_mmx) nounwind readnone
1167
1168define i64 @test11(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
1169; CHECK: psignw
1170entry:
1171  %0 = bitcast <1 x i64> %b to <4 x i16>
1172  %1 = bitcast <1 x i64> %a to <4 x i16>
1173  %2 = bitcast <4 x i16> %1 to x86_mmx
1174  %3 = bitcast <4 x i16> %0 to x86_mmx
1175  %4 = tail call x86_mmx @llvm.x86.ssse3.psign.w(x86_mmx %2, x86_mmx %3) nounwind readnone
1176  %5 = bitcast x86_mmx %4 to <4 x i16>
1177  %6 = bitcast <4 x i16> %5 to <1 x i64>
1178  %7 = extractelement <1 x i64> %6, i32 0
1179  ret i64 %7
1180}
1181
1182declare x86_mmx @llvm.x86.ssse3.psign.b(x86_mmx, x86_mmx) nounwind readnone
1183
1184define i64 @test10(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
1185; CHECK: psignb
1186entry:
1187  %0 = bitcast <1 x i64> %b to <8 x i8>
1188  %1 = bitcast <1 x i64> %a to <8 x i8>
1189  %2 = bitcast <8 x i8> %1 to x86_mmx
1190  %3 = bitcast <8 x i8> %0 to x86_mmx
1191  %4 = tail call x86_mmx @llvm.x86.ssse3.psign.b(x86_mmx %2, x86_mmx %3) nounwind readnone
1192  %5 = bitcast x86_mmx %4 to <8 x i8>
1193  %6 = bitcast <8 x i8> %5 to <1 x i64>
1194  %7 = extractelement <1 x i64> %6, i32 0
1195  ret i64 %7
1196}
1197
1198declare x86_mmx @llvm.x86.ssse3.pshuf.b(x86_mmx, x86_mmx) nounwind readnone
1199
1200define i64 @test9(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
1201; CHECK: pshufb
1202entry:
1203  %0 = bitcast <1 x i64> %b to <8 x i8>
1204  %1 = bitcast <1 x i64> %a to <8 x i8>
1205  %2 = bitcast <8 x i8> %1 to x86_mmx
1206  %3 = bitcast <8 x i8> %0 to x86_mmx
1207  %4 = tail call x86_mmx @llvm.x86.ssse3.pshuf.b(x86_mmx %2, x86_mmx %3) nounwind readnone
1208  %5 = bitcast x86_mmx %4 to <8 x i8>
1209  %6 = bitcast <8 x i8> %5 to <1 x i64>
1210  %7 = extractelement <1 x i64> %6, i32 0
1211  ret i64 %7
1212}
1213
1214declare x86_mmx @llvm.x86.ssse3.pmul.hr.sw(x86_mmx, x86_mmx) nounwind readnone
1215
1216define i64 @test8(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
1217; CHECK: pmulhrsw
1218entry:
1219  %0 = bitcast <1 x i64> %b to <4 x i16>
1220  %1 = bitcast <1 x i64> %a to <4 x i16>
1221  %2 = bitcast <4 x i16> %1 to x86_mmx
1222  %3 = bitcast <4 x i16> %0 to x86_mmx
1223  %4 = tail call x86_mmx @llvm.x86.ssse3.pmul.hr.sw(x86_mmx %2, x86_mmx %3) nounwind readnone
1224  %5 = bitcast x86_mmx %4 to <4 x i16>
1225  %6 = bitcast <4 x i16> %5 to <1 x i64>
1226  %7 = extractelement <1 x i64> %6, i32 0
1227  ret i64 %7
1228}
1229
1230declare x86_mmx @llvm.x86.ssse3.pmadd.ub.sw(x86_mmx, x86_mmx) nounwind readnone
1231
1232define i64 @test7(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
1233; CHECK: pmaddubsw
1234entry:
1235  %0 = bitcast <1 x i64> %b to <8 x i8>
1236  %1 = bitcast <1 x i64> %a to <8 x i8>
1237  %2 = bitcast <8 x i8> %1 to x86_mmx
1238  %3 = bitcast <8 x i8> %0 to x86_mmx
1239  %4 = tail call x86_mmx @llvm.x86.ssse3.pmadd.ub.sw(x86_mmx %2, x86_mmx %3) nounwind readnone
1240  %5 = bitcast x86_mmx %4 to <8 x i8>
1241  %6 = bitcast <8 x i8> %5 to <1 x i64>
1242  %7 = extractelement <1 x i64> %6, i32 0
1243  ret i64 %7
1244}
1245
1246declare x86_mmx @llvm.x86.ssse3.phsub.sw(x86_mmx, x86_mmx) nounwind readnone
1247
1248define i64 @test6(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
1249; CHECK: phsubsw
1250entry:
1251  %0 = bitcast <1 x i64> %b to <4 x i16>
1252  %1 = bitcast <1 x i64> %a to <4 x i16>
1253  %2 = bitcast <4 x i16> %1 to x86_mmx
1254  %3 = bitcast <4 x i16> %0 to x86_mmx
1255  %4 = tail call x86_mmx @llvm.x86.ssse3.phsub.sw(x86_mmx %2, x86_mmx %3) nounwind readnone
1256  %5 = bitcast x86_mmx %4 to <4 x i16>
1257  %6 = bitcast <4 x i16> %5 to <1 x i64>
1258  %7 = extractelement <1 x i64> %6, i32 0
1259  ret i64 %7
1260}
1261
1262declare x86_mmx @llvm.x86.ssse3.phsub.d(x86_mmx, x86_mmx) nounwind readnone
1263
1264define i64 @test5(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
1265; CHECK: phsubd
1266entry:
1267  %0 = bitcast <1 x i64> %b to <2 x i32>
1268  %1 = bitcast <1 x i64> %a to <2 x i32>
1269  %2 = bitcast <2 x i32> %1 to x86_mmx
1270  %3 = bitcast <2 x i32> %0 to x86_mmx
1271  %4 = tail call x86_mmx @llvm.x86.ssse3.phsub.d(x86_mmx %2, x86_mmx %3) nounwind readnone
1272  %5 = bitcast x86_mmx %4 to <2 x i32>
1273  %6 = bitcast <2 x i32> %5 to <1 x i64>
1274  %7 = extractelement <1 x i64> %6, i32 0
1275  ret i64 %7
1276}
1277
1278declare x86_mmx @llvm.x86.ssse3.phsub.w(x86_mmx, x86_mmx) nounwind readnone
1279
1280define i64 @test4(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
1281; CHECK: phsubw
1282entry:
1283  %0 = bitcast <1 x i64> %b to <4 x i16>
1284  %1 = bitcast <1 x i64> %a to <4 x i16>
1285  %2 = bitcast <4 x i16> %1 to x86_mmx
1286  %3 = bitcast <4 x i16> %0 to x86_mmx
1287  %4 = tail call x86_mmx @llvm.x86.ssse3.phsub.w(x86_mmx %2, x86_mmx %3) nounwind readnone
1288  %5 = bitcast x86_mmx %4 to <4 x i16>
1289  %6 = bitcast <4 x i16> %5 to <1 x i64>
1290  %7 = extractelement <1 x i64> %6, i32 0
1291  ret i64 %7
1292}
1293
1294declare x86_mmx @llvm.x86.ssse3.phadd.sw(x86_mmx, x86_mmx) nounwind readnone
1295
1296define i64 @test3(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
1297; CHECK: phaddsw
1298entry:
1299  %0 = bitcast <1 x i64> %b to <4 x i16>
1300  %1 = bitcast <1 x i64> %a to <4 x i16>
1301  %2 = bitcast <4 x i16> %1 to x86_mmx
1302  %3 = bitcast <4 x i16> %0 to x86_mmx
1303  %4 = tail call x86_mmx @llvm.x86.ssse3.phadd.sw(x86_mmx %2, x86_mmx %3) nounwind readnone
1304  %5 = bitcast x86_mmx %4 to <4 x i16>
1305  %6 = bitcast <4 x i16> %5 to <1 x i64>
1306  %7 = extractelement <1 x i64> %6, i32 0
1307  ret i64 %7
1308}
1309
1310declare x86_mmx @llvm.x86.ssse3.phadd.d(x86_mmx, x86_mmx) nounwind readnone
1311
1312define i64 @test2(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
1313; CHECK: phaddd
1314entry:
1315  %0 = bitcast <1 x i64> %b to <2 x i32>
1316  %1 = bitcast <1 x i64> %a to <2 x i32>
1317  %2 = bitcast <2 x i32> %1 to x86_mmx
1318  %3 = bitcast <2 x i32> %0 to x86_mmx
1319  %4 = tail call x86_mmx @llvm.x86.ssse3.phadd.d(x86_mmx %2, x86_mmx %3) nounwind readnone
1320  %5 = bitcast x86_mmx %4 to <2 x i32>
1321  %6 = bitcast <2 x i32> %5 to <1 x i64>
1322  %7 = extractelement <1 x i64> %6, i32 0
1323  ret i64 %7
1324}
1325