1; RUN: llc < %s -march=x86 -mattr=+mmx,+ssse3,-avx | FileCheck %s --check-prefix=ALL --check-prefix=X86
2; RUN: llc < %s -march=x86 -mattr=+mmx,+avx | FileCheck %s --check-prefix=ALL --check-prefix=X86
3; RUN: llc < %s -march=x86-64 -mattr=+mmx,+ssse3,-avx | FileCheck %s --check-prefix=ALL --check-prefix=X64
4; RUN: llc < %s -march=x86-64 -mattr=+mmx,+avx | FileCheck %s --check-prefix=ALL --check-prefix=X64
5
6declare x86_mmx @llvm.x86.ssse3.phadd.w(x86_mmx, x86_mmx) nounwind readnone
7
8define i64 @test1(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
9; ALL-LABEL: @test1
10; ALL: phaddw
11entry:
12  %0 = bitcast <1 x i64> %b to <4 x i16>
13  %1 = bitcast <1 x i64> %a to <4 x i16>
14  %2 = bitcast <4 x i16> %1 to x86_mmx
15  %3 = bitcast <4 x i16> %0 to x86_mmx
16  %4 = tail call x86_mmx @llvm.x86.ssse3.phadd.w(x86_mmx %2, x86_mmx %3) nounwind readnone
17  %5 = bitcast x86_mmx %4 to <4 x i16>
18  %6 = bitcast <4 x i16> %5 to <1 x i64>
19  %7 = extractelement <1 x i64> %6, i32 0
20  ret i64 %7
21}
22
23declare x86_mmx @llvm.x86.mmx.pcmpgt.d(x86_mmx, x86_mmx) nounwind readnone
24
25define i64 @test88(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
26; ALL-LABEL: @test88
27; ALL: pcmpgtd
28entry:
29  %0 = bitcast <1 x i64> %b to <2 x i32>
30  %1 = bitcast <1 x i64> %a to <2 x i32>
31  %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
32  %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
33  %2 = tail call x86_mmx @llvm.x86.mmx.pcmpgt.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
34  %3 = bitcast x86_mmx %2 to <2 x i32>
35  %4 = bitcast <2 x i32> %3 to <1 x i64>
36  %5 = extractelement <1 x i64> %4, i32 0
37  ret i64 %5
38}
39
40declare x86_mmx @llvm.x86.mmx.pcmpgt.w(x86_mmx, x86_mmx) nounwind readnone
41
42define i64 @test87(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
43; ALL-LABEL: @test87
44; ALL: pcmpgtw
45entry:
46  %0 = bitcast <1 x i64> %b to <4 x i16>
47  %1 = bitcast <1 x i64> %a to <4 x i16>
48  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
49  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
50  %2 = tail call x86_mmx @llvm.x86.mmx.pcmpgt.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
51  %3 = bitcast x86_mmx %2 to <4 x i16>
52  %4 = bitcast <4 x i16> %3 to <1 x i64>
53  %5 = extractelement <1 x i64> %4, i32 0
54  ret i64 %5
55}
56
57declare x86_mmx @llvm.x86.mmx.pcmpgt.b(x86_mmx, x86_mmx) nounwind readnone
58
59define i64 @test86(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
60; ALL-LABEL: @test86
61; ALL: pcmpgtb
62entry:
63  %0 = bitcast <1 x i64> %b to <8 x i8>
64  %1 = bitcast <1 x i64> %a to <8 x i8>
65  %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
66  %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
67  %2 = tail call x86_mmx @llvm.x86.mmx.pcmpgt.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
68  %3 = bitcast x86_mmx %2 to <8 x i8>
69  %4 = bitcast <8 x i8> %3 to <1 x i64>
70  %5 = extractelement <1 x i64> %4, i32 0
71  ret i64 %5
72}
73
74declare x86_mmx @llvm.x86.mmx.pcmpeq.d(x86_mmx, x86_mmx) nounwind readnone
75
76define i64 @test85(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
77; ALL-LABEL: @test85
78; ALL: pcmpeqd
79entry:
80  %0 = bitcast <1 x i64> %b to <2 x i32>
81  %1 = bitcast <1 x i64> %a to <2 x i32>
82  %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
83  %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
84  %2 = tail call x86_mmx @llvm.x86.mmx.pcmpeq.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
85  %3 = bitcast x86_mmx %2 to <2 x i32>
86  %4 = bitcast <2 x i32> %3 to <1 x i64>
87  %5 = extractelement <1 x i64> %4, i32 0
88  ret i64 %5
89}
90
91declare x86_mmx @llvm.x86.mmx.pcmpeq.w(x86_mmx, x86_mmx) nounwind readnone
92
93define i64 @test84(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
94; ALL-LABEL: @test84
95; ALL: pcmpeqw
96entry:
97  %0 = bitcast <1 x i64> %b to <4 x i16>
98  %1 = bitcast <1 x i64> %a to <4 x i16>
99  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
100  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
101  %2 = tail call x86_mmx @llvm.x86.mmx.pcmpeq.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
102  %3 = bitcast x86_mmx %2 to <4 x i16>
103  %4 = bitcast <4 x i16> %3 to <1 x i64>
104  %5 = extractelement <1 x i64> %4, i32 0
105  ret i64 %5
106}
107
108declare x86_mmx @llvm.x86.mmx.pcmpeq.b(x86_mmx, x86_mmx) nounwind readnone
109
110define i64 @test83(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
111; ALL-LABEL: @test83
112; ALL: pcmpeqb
113entry:
114  %0 = bitcast <1 x i64> %b to <8 x i8>
115  %1 = bitcast <1 x i64> %a to <8 x i8>
116  %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
117  %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
118  %2 = tail call x86_mmx @llvm.x86.mmx.pcmpeq.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
119  %3 = bitcast x86_mmx %2 to <8 x i8>
120  %4 = bitcast <8 x i8> %3 to <1 x i64>
121  %5 = extractelement <1 x i64> %4, i32 0
122  ret i64 %5
123}
124
125declare x86_mmx @llvm.x86.mmx.punpckldq(x86_mmx, x86_mmx) nounwind readnone
126
127define i64 @test82(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
128; ALL-LABEL: @test82
129; X86: punpckldq {{.*#+}} mm0 = mm0[0],mem[0]
130; X64: punpckldq {{.*#+}} mm0 = mm0[0],mm1[0]
131entry:
132  %0 = bitcast <1 x i64> %b to <2 x i32>
133  %1 = bitcast <1 x i64> %a to <2 x i32>
134  %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
135  %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
136  %2 = tail call x86_mmx @llvm.x86.mmx.punpckldq(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
137  %3 = bitcast x86_mmx %2 to <2 x i32>
138  %4 = bitcast <2 x i32> %3 to <1 x i64>
139  %5 = extractelement <1 x i64> %4, i32 0
140  ret i64 %5
141}
142
143declare x86_mmx @llvm.x86.mmx.punpcklwd(x86_mmx, x86_mmx) nounwind readnone
144
145define i64 @test81(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
146; ALL-LABEL: @test81
147; X86: punpcklwd {{.*#+}} mm0 = mm0[0],mem[0],mm0[1],mem[1]
148; X64: punpcklwd {{.*#+}} mm0 = mm0[0],mm1[0],mm0[1],mm1[1]
149entry:
150  %0 = bitcast <1 x i64> %b to <4 x i16>
151  %1 = bitcast <1 x i64> %a to <4 x i16>
152  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
153  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
154  %2 = tail call x86_mmx @llvm.x86.mmx.punpcklwd(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
155  %3 = bitcast x86_mmx %2 to <4 x i16>
156  %4 = bitcast <4 x i16> %3 to <1 x i64>
157  %5 = extractelement <1 x i64> %4, i32 0
158  ret i64 %5
159}
160
161declare x86_mmx @llvm.x86.mmx.punpcklbw(x86_mmx, x86_mmx) nounwind readnone
162
163define i64 @test80(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
164; ALL-LABEL: @test80
165; X86: punpcklbw {{.*#+}} mm0 = mm0[0],mem[0],mm0[1],mem[1],mm0[2],mem[2],mm0[3],mem[3]
166; X64: punpcklbw {{.*#+}} mm0 = mm0[0],mm1[0],mm0[1],mm1[1],mm0[2],mm1[2],mm0[3],mm1[3]
167entry:
168  %0 = bitcast <1 x i64> %b to <8 x i8>
169  %1 = bitcast <1 x i64> %a to <8 x i8>
170  %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
171  %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
172  %2 = tail call x86_mmx @llvm.x86.mmx.punpcklbw(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
173  %3 = bitcast x86_mmx %2 to <8 x i8>
174  %4 = bitcast <8 x i8> %3 to <1 x i64>
175  %5 = extractelement <1 x i64> %4, i32 0
176  ret i64 %5
177}
178
179declare x86_mmx @llvm.x86.mmx.punpckhdq(x86_mmx, x86_mmx) nounwind readnone
180
181define i64 @test79(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
182; ALL-LABEL: @test79
183; X86: punpckhdq {{.*#+}} mm0 = mm0[1],mem[1]
184; X64: punpckhdq {{.*#+}} mm0 = mm0[1],mm1[1]
185entry:
186  %0 = bitcast <1 x i64> %b to <2 x i32>
187  %1 = bitcast <1 x i64> %a to <2 x i32>
188  %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
189  %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
190  %2 = tail call x86_mmx @llvm.x86.mmx.punpckhdq(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
191  %3 = bitcast x86_mmx %2 to <2 x i32>
192  %4 = bitcast <2 x i32> %3 to <1 x i64>
193  %5 = extractelement <1 x i64> %4, i32 0
194  ret i64 %5
195}
196
197declare x86_mmx @llvm.x86.mmx.punpckhwd(x86_mmx, x86_mmx) nounwind readnone
198
199define i64 @test78(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
200; ALL-LABEL: @test78
201; X86: punpckhwd {{.*#+}} mm0 = mm0[2],mem[2],mm0[3],mem[3]
202; X64: punpckhwd {{.*#+}} mm0 = mm0[2],mm1[2],mm0[3],mm1[3]
203entry:
204  %0 = bitcast <1 x i64> %b to <4 x i16>
205  %1 = bitcast <1 x i64> %a to <4 x i16>
206  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
207  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
208  %2 = tail call x86_mmx @llvm.x86.mmx.punpckhwd(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
209  %3 = bitcast x86_mmx %2 to <4 x i16>
210  %4 = bitcast <4 x i16> %3 to <1 x i64>
211  %5 = extractelement <1 x i64> %4, i32 0
212  ret i64 %5
213}
214
215declare x86_mmx @llvm.x86.mmx.punpckhbw(x86_mmx, x86_mmx) nounwind readnone
216
217define i64 @test77(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
218; ALL-LABEL: @test77
219; X86: punpckhbw {{.*#+}} mm0 = mm0[4],mem[4],mm0[5],mem[5],mm0[6],mem[6],mm0[7],mem[7]
220; X64: punpckhbw {{.*#+}} mm0 = mm0[4],mm1[4],mm0[5],mm1[5],mm0[6],mm1[6],mm0[7],mm1[7]
221entry:
222  %0 = bitcast <1 x i64> %b to <8 x i8>
223  %1 = bitcast <1 x i64> %a to <8 x i8>
224  %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
225  %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
226  %2 = tail call x86_mmx @llvm.x86.mmx.punpckhbw(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
227  %3 = bitcast x86_mmx %2 to <8 x i8>
228  %4 = bitcast <8 x i8> %3 to <1 x i64>
229  %5 = extractelement <1 x i64> %4, i32 0
230  ret i64 %5
231}
232
233declare x86_mmx @llvm.x86.mmx.packuswb(x86_mmx, x86_mmx) nounwind readnone
234
235define i64 @test76(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
236; ALL-LABEL: @test76
237; ALL: packuswb
238entry:
239  %0 = bitcast <1 x i64> %b to <4 x i16>
240  %1 = bitcast <1 x i64> %a to <4 x i16>
241  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
242  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
243  %2 = tail call x86_mmx @llvm.x86.mmx.packuswb(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
244  %3 = bitcast x86_mmx %2 to <8 x i8>
245  %4 = bitcast <8 x i8> %3 to <1 x i64>
246  %5 = extractelement <1 x i64> %4, i32 0
247  ret i64 %5
248}
249
250declare x86_mmx @llvm.x86.mmx.packssdw(x86_mmx, x86_mmx) nounwind readnone
251
252define i64 @test75(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
253; ALL-LABEL: @test75
254; ALL: packssdw
255entry:
256  %0 = bitcast <1 x i64> %b to <2 x i32>
257  %1 = bitcast <1 x i64> %a to <2 x i32>
258  %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
259  %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
260  %2 = tail call x86_mmx @llvm.x86.mmx.packssdw(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
261  %3 = bitcast x86_mmx %2 to <4 x i16>
262  %4 = bitcast <4 x i16> %3 to <1 x i64>
263  %5 = extractelement <1 x i64> %4, i32 0
264  ret i64 %5
265}
266
267declare x86_mmx @llvm.x86.mmx.packsswb(x86_mmx, x86_mmx) nounwind readnone
268
269define i64 @test74(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
270; ALL-LABEL: @test74
271; ALL: packsswb
272entry:
273  %0 = bitcast <1 x i64> %b to <4 x i16>
274  %1 = bitcast <1 x i64> %a to <4 x i16>
275  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
276  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
277  %2 = tail call x86_mmx @llvm.x86.mmx.packsswb(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
278  %3 = bitcast x86_mmx %2 to <8 x i8>
279  %4 = bitcast <8 x i8> %3 to <1 x i64>
280  %5 = extractelement <1 x i64> %4, i32 0
281  ret i64 %5
282}
283
284declare x86_mmx @llvm.x86.mmx.psrai.d(x86_mmx, i32) nounwind readnone
285
286define i64 @test73(<1 x i64> %a) nounwind readnone optsize ssp {
287; ALL-LABEL: @test73
288; ALL: psrad
289entry:
290  %0 = bitcast <1 x i64> %a to <2 x i32>
291  %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx
292  %1 = tail call x86_mmx @llvm.x86.mmx.psrai.d(x86_mmx %mmx_var.i, i32 3) nounwind
293  %2 = bitcast x86_mmx %1 to <2 x i32>
294  %3 = bitcast <2 x i32> %2 to <1 x i64>
295  %4 = extractelement <1 x i64> %3, i32 0
296  ret i64 %4
297}
298
299declare x86_mmx @llvm.x86.mmx.psrai.w(x86_mmx, i32) nounwind readnone
300
301define i64 @test72(<1 x i64> %a) nounwind readnone optsize ssp {
302; ALL-LABEL: @test72
303; ALL: psraw
304entry:
305  %0 = bitcast <1 x i64> %a to <4 x i16>
306  %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx
307  %1 = tail call x86_mmx @llvm.x86.mmx.psrai.w(x86_mmx %mmx_var.i, i32 3) nounwind
308  %2 = bitcast x86_mmx %1 to <4 x i16>
309  %3 = bitcast <4 x i16> %2 to <1 x i64>
310  %4 = extractelement <1 x i64> %3, i32 0
311  ret i64 %4
312}
313
314declare x86_mmx @llvm.x86.mmx.psrli.q(x86_mmx, i32) nounwind readnone
315
316define i64 @test71(<1 x i64> %a) nounwind readnone optsize ssp {
317; ALL-LABEL: @test71
318; ALL: psrlq
319entry:
320  %0 = extractelement <1 x i64> %a, i32 0
321  %mmx_var.i = bitcast i64 %0 to x86_mmx
322  %1 = tail call x86_mmx @llvm.x86.mmx.psrli.q(x86_mmx %mmx_var.i, i32 3) nounwind
323  %2 = bitcast x86_mmx %1 to i64
324  ret i64 %2
325}
326
327declare x86_mmx @llvm.x86.mmx.psrli.d(x86_mmx, i32) nounwind readnone
328
329define i64 @test70(<1 x i64> %a) nounwind readnone optsize ssp {
330; ALL-LABEL: @test70
331; ALL: psrld
332entry:
333  %0 = bitcast <1 x i64> %a to <2 x i32>
334  %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx
335  %1 = tail call x86_mmx @llvm.x86.mmx.psrli.d(x86_mmx %mmx_var.i, i32 3) nounwind
336  %2 = bitcast x86_mmx %1 to <2 x i32>
337  %3 = bitcast <2 x i32> %2 to <1 x i64>
338  %4 = extractelement <1 x i64> %3, i32 0
339  ret i64 %4
340}
341
342declare x86_mmx @llvm.x86.mmx.psrli.w(x86_mmx, i32) nounwind readnone
343
344define i64 @test69(<1 x i64> %a) nounwind readnone optsize ssp {
345; ALL-LABEL: @test69
346; ALL: psrlw
347entry:
348  %0 = bitcast <1 x i64> %a to <4 x i16>
349  %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx
350  %1 = tail call x86_mmx @llvm.x86.mmx.psrli.w(x86_mmx %mmx_var.i, i32 3) nounwind
351  %2 = bitcast x86_mmx %1 to <4 x i16>
352  %3 = bitcast <4 x i16> %2 to <1 x i64>
353  %4 = extractelement <1 x i64> %3, i32 0
354  ret i64 %4
355}
356
357declare x86_mmx @llvm.x86.mmx.pslli.q(x86_mmx, i32) nounwind readnone
358
359define i64 @test68(<1 x i64> %a) nounwind readnone optsize ssp {
360; ALL-LABEL: @test68
361; ALL: psllq
362entry:
363  %0 = extractelement <1 x i64> %a, i32 0
364  %mmx_var.i = bitcast i64 %0 to x86_mmx
365  %1 = tail call x86_mmx @llvm.x86.mmx.pslli.q(x86_mmx %mmx_var.i, i32 3) nounwind
366  %2 = bitcast x86_mmx %1 to i64
367  ret i64 %2
368}
369
370declare x86_mmx @llvm.x86.mmx.pslli.d(x86_mmx, i32) nounwind readnone
371
372define i64 @test67(<1 x i64> %a) nounwind readnone optsize ssp {
373; ALL-LABEL: @test67
374; ALL: pslld
375entry:
376  %0 = bitcast <1 x i64> %a to <2 x i32>
377  %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx
378  %1 = tail call x86_mmx @llvm.x86.mmx.pslli.d(x86_mmx %mmx_var.i, i32 3) nounwind
379  %2 = bitcast x86_mmx %1 to <2 x i32>
380  %3 = bitcast <2 x i32> %2 to <1 x i64>
381  %4 = extractelement <1 x i64> %3, i32 0
382  ret i64 %4
383}
384
385declare x86_mmx @llvm.x86.mmx.pslli.w(x86_mmx, i32) nounwind readnone
386
387define i64 @test66(<1 x i64> %a) nounwind readnone optsize ssp {
388; ALL-LABEL: @test66
389; ALL: psllw
390entry:
391  %0 = bitcast <1 x i64> %a to <4 x i16>
392  %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx
393  %1 = tail call x86_mmx @llvm.x86.mmx.pslli.w(x86_mmx %mmx_var.i, i32 3) nounwind
394  %2 = bitcast x86_mmx %1 to <4 x i16>
395  %3 = bitcast <4 x i16> %2 to <1 x i64>
396  %4 = extractelement <1 x i64> %3, i32 0
397  ret i64 %4
398}
399
400declare x86_mmx @llvm.x86.mmx.psra.d(x86_mmx, x86_mmx) nounwind readnone
401
402define i64 @test65(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
403; ALL-LABEL: @test65
404; ALL: psrad
405entry:
406  %0 = bitcast <1 x i64> %a to <2 x i32>
407  %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx
408  %1 = extractelement <1 x i64> %b, i32 0
409  %mmx_var1.i = bitcast i64 %1 to x86_mmx
410  %2 = tail call x86_mmx @llvm.x86.mmx.psra.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
411  %3 = bitcast x86_mmx %2 to <2 x i32>
412  %4 = bitcast <2 x i32> %3 to <1 x i64>
413  %5 = extractelement <1 x i64> %4, i32 0
414  ret i64 %5
415}
416
417declare x86_mmx @llvm.x86.mmx.psra.w(x86_mmx, x86_mmx) nounwind readnone
418
419define i64 @test64(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
420; ALL-LABEL: @test64
421; ALL: psraw
422entry:
423  %0 = bitcast <1 x i64> %a to <4 x i16>
424  %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx
425  %1 = extractelement <1 x i64> %b, i32 0
426  %mmx_var1.i = bitcast i64 %1 to x86_mmx
427  %2 = tail call x86_mmx @llvm.x86.mmx.psra.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
428  %3 = bitcast x86_mmx %2 to <4 x i16>
429  %4 = bitcast <4 x i16> %3 to <1 x i64>
430  %5 = extractelement <1 x i64> %4, i32 0
431  ret i64 %5
432}
433
434declare x86_mmx @llvm.x86.mmx.psrl.q(x86_mmx, x86_mmx) nounwind readnone
435
436define i64 @test63(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
437; ALL-LABEL: @test63
438; ALL: psrlq
439entry:
440  %0 = extractelement <1 x i64> %a, i32 0
441  %mmx_var.i = bitcast i64 %0 to x86_mmx
442  %1 = extractelement <1 x i64> %b, i32 0
443  %mmx_var1.i = bitcast i64 %1 to x86_mmx
444  %2 = tail call x86_mmx @llvm.x86.mmx.psrl.q(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
445  %3 = bitcast x86_mmx %2 to i64
446  ret i64 %3
447}
448
449declare x86_mmx @llvm.x86.mmx.psrl.d(x86_mmx, x86_mmx) nounwind readnone
450
451define i64 @test62(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
452; ALL-LABEL: @test62
453; ALL: psrld
454entry:
455  %0 = bitcast <1 x i64> %a to <2 x i32>
456  %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx
457  %1 = extractelement <1 x i64> %b, i32 0
458  %mmx_var1.i = bitcast i64 %1 to x86_mmx
459  %2 = tail call x86_mmx @llvm.x86.mmx.psrl.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
460  %3 = bitcast x86_mmx %2 to <2 x i32>
461  %4 = bitcast <2 x i32> %3 to <1 x i64>
462  %5 = extractelement <1 x i64> %4, i32 0
463  ret i64 %5
464}
465
466declare x86_mmx @llvm.x86.mmx.psrl.w(x86_mmx, x86_mmx) nounwind readnone
467
468define i64 @test61(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
469; ALL-LABEL: @test61
470; ALL: psrlw
471entry:
472  %0 = bitcast <1 x i64> %a to <4 x i16>
473  %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx
474  %1 = extractelement <1 x i64> %b, i32 0
475  %mmx_var1.i = bitcast i64 %1 to x86_mmx
476  %2 = tail call x86_mmx @llvm.x86.mmx.psrl.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
477  %3 = bitcast x86_mmx %2 to <4 x i16>
478  %4 = bitcast <4 x i16> %3 to <1 x i64>
479  %5 = extractelement <1 x i64> %4, i32 0
480  ret i64 %5
481}
482
483declare x86_mmx @llvm.x86.mmx.psll.q(x86_mmx, x86_mmx) nounwind readnone
484
485define i64 @test60(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
486; ALL-LABEL: @test60
487; ALL: psllq
488entry:
489  %0 = extractelement <1 x i64> %a, i32 0
490  %mmx_var.i = bitcast i64 %0 to x86_mmx
491  %1 = extractelement <1 x i64> %b, i32 0
492  %mmx_var1.i = bitcast i64 %1 to x86_mmx
493  %2 = tail call x86_mmx @llvm.x86.mmx.psll.q(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
494  %3 = bitcast x86_mmx %2 to i64
495  ret i64 %3
496}
497
498declare x86_mmx @llvm.x86.mmx.psll.d(x86_mmx, x86_mmx) nounwind readnone
499
500define i64 @test59(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
501; ALL-LABEL: @test59
502; ALL: pslld
503entry:
504  %0 = bitcast <1 x i64> %a to <2 x i32>
505  %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx
506  %1 = extractelement <1 x i64> %b, i32 0
507  %mmx_var1.i = bitcast i64 %1 to x86_mmx
508  %2 = tail call x86_mmx @llvm.x86.mmx.psll.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
509  %3 = bitcast x86_mmx %2 to <2 x i32>
510  %4 = bitcast <2 x i32> %3 to <1 x i64>
511  %5 = extractelement <1 x i64> %4, i32 0
512  ret i64 %5
513}
514
515declare x86_mmx @llvm.x86.mmx.psll.w(x86_mmx, x86_mmx) nounwind readnone
516
517define i64 @test58(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
518; ALL-LABEL: @test58
519; ALL: psllw
520entry:
521  %0 = bitcast <1 x i64> %a to <4 x i16>
522  %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx
523  %1 = extractelement <1 x i64> %b, i32 0
524  %mmx_var1.i = bitcast i64 %1 to x86_mmx
525  %2 = tail call x86_mmx @llvm.x86.mmx.psll.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
526  %3 = bitcast x86_mmx %2 to <4 x i16>
527  %4 = bitcast <4 x i16> %3 to <1 x i64>
528  %5 = extractelement <1 x i64> %4, i32 0
529  ret i64 %5
530}
531
532declare x86_mmx @llvm.x86.mmx.pxor(x86_mmx, x86_mmx) nounwind readnone
533
534define i64 @test56(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
535; ALL-LABEL: @test56
536; ALL: pxor
537entry:
538  %0 = bitcast <1 x i64> %b to <2 x i32>
539  %1 = bitcast <1 x i64> %a to <2 x i32>
540  %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
541  %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
542  %2 = tail call x86_mmx @llvm.x86.mmx.pxor(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
543  %3 = bitcast x86_mmx %2 to <2 x i32>
544  %4 = bitcast <2 x i32> %3 to <1 x i64>
545  %5 = extractelement <1 x i64> %4, i32 0
546  ret i64 %5
547}
548
549declare x86_mmx @llvm.x86.mmx.por(x86_mmx, x86_mmx) nounwind readnone
550
551define i64 @test55(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
552; ALL-LABEL: @test55
553; ALL: por
554entry:
555  %0 = bitcast <1 x i64> %b to <2 x i32>
556  %1 = bitcast <1 x i64> %a to <2 x i32>
557  %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
558  %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
559  %2 = tail call x86_mmx @llvm.x86.mmx.por(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
560  %3 = bitcast x86_mmx %2 to <2 x i32>
561  %4 = bitcast <2 x i32> %3 to <1 x i64>
562  %5 = extractelement <1 x i64> %4, i32 0
563  ret i64 %5
564}
565
566declare x86_mmx @llvm.x86.mmx.pandn(x86_mmx, x86_mmx) nounwind readnone
567
568define i64 @test54(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
569; ALL-LABEL: @test54
570; ALL: pandn
571entry:
572  %0 = bitcast <1 x i64> %b to <2 x i32>
573  %1 = bitcast <1 x i64> %a to <2 x i32>
574  %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
575  %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
576  %2 = tail call x86_mmx @llvm.x86.mmx.pandn(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
577  %3 = bitcast x86_mmx %2 to <2 x i32>
578  %4 = bitcast <2 x i32> %3 to <1 x i64>
579  %5 = extractelement <1 x i64> %4, i32 0
580  ret i64 %5
581}
582
583declare x86_mmx @llvm.x86.mmx.pand(x86_mmx, x86_mmx) nounwind readnone
584
585define i64 @test53(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
586; ALL-LABEL: @test53
587; ALL: pand
588entry:
589  %0 = bitcast <1 x i64> %b to <2 x i32>
590  %1 = bitcast <1 x i64> %a to <2 x i32>
591  %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
592  %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
593  %2 = tail call x86_mmx @llvm.x86.mmx.pand(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
594  %3 = bitcast x86_mmx %2 to <2 x i32>
595  %4 = bitcast <2 x i32> %3 to <1 x i64>
596  %5 = extractelement <1 x i64> %4, i32 0
597  ret i64 %5
598}
599
600declare x86_mmx @llvm.x86.mmx.pmull.w(x86_mmx, x86_mmx) nounwind readnone
601
602define i64 @test52(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
603; ALL-LABEL: @test52
604; ALL: pmullw
605entry:
606  %0 = bitcast <1 x i64> %b to <4 x i16>
607  %1 = bitcast <1 x i64> %a to <4 x i16>
608  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
609  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
610  %2 = tail call x86_mmx @llvm.x86.mmx.pmull.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
611  %3 = bitcast x86_mmx %2 to <4 x i16>
612  %4 = bitcast <4 x i16> %3 to <1 x i64>
613  %5 = extractelement <1 x i64> %4, i32 0
614  ret i64 %5
615}
616
617define i64 @test51(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
618; ALL-LABEL: @test51
619; ALL: pmullw
620entry:
621  %0 = bitcast <1 x i64> %b to <4 x i16>
622  %1 = bitcast <1 x i64> %a to <4 x i16>
623  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
624  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
625  %2 = tail call x86_mmx @llvm.x86.mmx.pmull.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
626  %3 = bitcast x86_mmx %2 to <4 x i16>
627  %4 = bitcast <4 x i16> %3 to <1 x i64>
628  %5 = extractelement <1 x i64> %4, i32 0
629  ret i64 %5
630}
631
632declare x86_mmx @llvm.x86.mmx.pmulh.w(x86_mmx, x86_mmx) nounwind readnone
633
634define i64 @test50(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
635; ALL-LABEL: @test50
636; ALL: pmulhw
637entry:
638  %0 = bitcast <1 x i64> %b to <4 x i16>
639  %1 = bitcast <1 x i64> %a to <4 x i16>
640  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
641  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
642  %2 = tail call x86_mmx @llvm.x86.mmx.pmulh.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
643  %3 = bitcast x86_mmx %2 to <4 x i16>
644  %4 = bitcast <4 x i16> %3 to <1 x i64>
645  %5 = extractelement <1 x i64> %4, i32 0
646  ret i64 %5
647}
648
649declare x86_mmx @llvm.x86.mmx.pmadd.wd(x86_mmx, x86_mmx) nounwind readnone
650
651define i64 @test49(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
652; ALL-LABEL: @test49
653; ALL: pmaddwd
654entry:
655  %0 = bitcast <1 x i64> %b to <4 x i16>
656  %1 = bitcast <1 x i64> %a to <4 x i16>
657  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
658  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
659  %2 = tail call x86_mmx @llvm.x86.mmx.pmadd.wd(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
660  %3 = bitcast x86_mmx %2 to <2 x i32>
661  %4 = bitcast <2 x i32> %3 to <1 x i64>
662  %5 = extractelement <1 x i64> %4, i32 0
663  ret i64 %5
664}
665
666declare x86_mmx @llvm.x86.mmx.psubus.w(x86_mmx, x86_mmx) nounwind readnone
667
668define i64 @test48(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
669; ALL-LABEL: @test48
670; ALL: psubusw
671entry:
672  %0 = bitcast <1 x i64> %b to <4 x i16>
673  %1 = bitcast <1 x i64> %a to <4 x i16>
674  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
675  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
676  %2 = tail call x86_mmx @llvm.x86.mmx.psubus.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
677  %3 = bitcast x86_mmx %2 to <4 x i16>
678  %4 = bitcast <4 x i16> %3 to <1 x i64>
679  %5 = extractelement <1 x i64> %4, i32 0
680  ret i64 %5
681}
682
683declare x86_mmx @llvm.x86.mmx.psubus.b(x86_mmx, x86_mmx) nounwind readnone
684
685define i64 @test47(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
686; ALL-LABEL: @test47
687; ALL: psubusb
688entry:
689  %0 = bitcast <1 x i64> %b to <8 x i8>
690  %1 = bitcast <1 x i64> %a to <8 x i8>
691  %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
692  %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
693  %2 = tail call x86_mmx @llvm.x86.mmx.psubus.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
694  %3 = bitcast x86_mmx %2 to <8 x i8>
695  %4 = bitcast <8 x i8> %3 to <1 x i64>
696  %5 = extractelement <1 x i64> %4, i32 0
697  ret i64 %5
698}
699
700declare x86_mmx @llvm.x86.mmx.psubs.w(x86_mmx, x86_mmx) nounwind readnone
701
702define i64 @test46(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
703; ALL-LABEL: @test46
704; ALL: psubsw
705entry:
706  %0 = bitcast <1 x i64> %b to <4 x i16>
707  %1 = bitcast <1 x i64> %a to <4 x i16>
708  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
709  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
710  %2 = tail call x86_mmx @llvm.x86.mmx.psubs.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
711  %3 = bitcast x86_mmx %2 to <4 x i16>
712  %4 = bitcast <4 x i16> %3 to <1 x i64>
713  %5 = extractelement <1 x i64> %4, i32 0
714  ret i64 %5
715}
716
717declare x86_mmx @llvm.x86.mmx.psubs.b(x86_mmx, x86_mmx) nounwind readnone
718
719define i64 @test45(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
720; ALL-LABEL: @test45
721; ALL: psubsb
722entry:
723  %0 = bitcast <1 x i64> %b to <8 x i8>
724  %1 = bitcast <1 x i64> %a to <8 x i8>
725  %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
726  %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
727  %2 = tail call x86_mmx @llvm.x86.mmx.psubs.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
728  %3 = bitcast x86_mmx %2 to <8 x i8>
729  %4 = bitcast <8 x i8> %3 to <1 x i64>
730  %5 = extractelement <1 x i64> %4, i32 0
731  ret i64 %5
732}
733
734define i64 @test44(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
735; ALL-LABEL: @test44
736; ALL: psubq
737entry:
738  %0 = extractelement <1 x i64> %a, i32 0
739  %mmx_var = bitcast i64 %0 to x86_mmx
740  %1 = extractelement <1 x i64> %b, i32 0
741  %mmx_var1 = bitcast i64 %1 to x86_mmx
742  %2 = tail call x86_mmx @llvm.x86.mmx.psub.q(x86_mmx %mmx_var, x86_mmx %mmx_var1)
743  %3 = bitcast x86_mmx %2 to i64
744  ret i64 %3
745}
746
747declare x86_mmx @llvm.x86.mmx.psub.q(x86_mmx, x86_mmx) nounwind readnone
748
749declare x86_mmx @llvm.x86.mmx.psub.d(x86_mmx, x86_mmx) nounwind readnone
750
751define i64 @test43(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
752; ALL-LABEL: @test43
753; ALL: psubd
754entry:
755  %0 = bitcast <1 x i64> %b to <2 x i32>
756  %1 = bitcast <1 x i64> %a to <2 x i32>
757  %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
758  %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
759  %2 = tail call x86_mmx @llvm.x86.mmx.psub.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
760  %3 = bitcast x86_mmx %2 to <2 x i32>
761  %4 = bitcast <2 x i32> %3 to <1 x i64>
762  %5 = extractelement <1 x i64> %4, i32 0
763  ret i64 %5
764}
765
766declare x86_mmx @llvm.x86.mmx.psub.w(x86_mmx, x86_mmx) nounwind readnone
767
768define i64 @test42(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
769; ALL-LABEL: @test42
770; ALL: psubw
771entry:
772  %0 = bitcast <1 x i64> %b to <4 x i16>
773  %1 = bitcast <1 x i64> %a to <4 x i16>
774  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
775  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
776  %2 = tail call x86_mmx @llvm.x86.mmx.psub.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
777  %3 = bitcast x86_mmx %2 to <4 x i16>
778  %4 = bitcast <4 x i16> %3 to <1 x i64>
779  %5 = extractelement <1 x i64> %4, i32 0
780  ret i64 %5
781}
782
783declare x86_mmx @llvm.x86.mmx.psub.b(x86_mmx, x86_mmx) nounwind readnone
784
785define i64 @test41(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
786; ALL-LABEL: @test41
787; ALL: psubb
788entry:
789  %0 = bitcast <1 x i64> %b to <8 x i8>
790  %1 = bitcast <1 x i64> %a to <8 x i8>
791  %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
792  %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
793  %2 = tail call x86_mmx @llvm.x86.mmx.psub.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
794  %3 = bitcast x86_mmx %2 to <8 x i8>
795  %4 = bitcast <8 x i8> %3 to <1 x i64>
796  %5 = extractelement <1 x i64> %4, i32 0
797  ret i64 %5
798}
799
800declare x86_mmx @llvm.x86.mmx.paddus.w(x86_mmx, x86_mmx) nounwind readnone
801
802define i64 @test40(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
803; ALL-LABEL: @test40
804; ALL: paddusw
805entry:
806  %0 = bitcast <1 x i64> %b to <4 x i16>
807  %1 = bitcast <1 x i64> %a to <4 x i16>
808  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
809  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
810  %2 = tail call x86_mmx @llvm.x86.mmx.paddus.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
811  %3 = bitcast x86_mmx %2 to <4 x i16>
812  %4 = bitcast <4 x i16> %3 to <1 x i64>
813  %5 = extractelement <1 x i64> %4, i32 0
814  ret i64 %5
815}
816
817declare x86_mmx @llvm.x86.mmx.paddus.b(x86_mmx, x86_mmx) nounwind readnone
818
819define i64 @test39(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
820; ALL-LABEL: @test39
821; ALL: paddusb
822entry:
823  %0 = bitcast <1 x i64> %b to <8 x i8>
824  %1 = bitcast <1 x i64> %a to <8 x i8>
825  %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
826  %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
827  %2 = tail call x86_mmx @llvm.x86.mmx.paddus.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
828  %3 = bitcast x86_mmx %2 to <8 x i8>
829  %4 = bitcast <8 x i8> %3 to <1 x i64>
830  %5 = extractelement <1 x i64> %4, i32 0
831  ret i64 %5
832}
833
834declare x86_mmx @llvm.x86.mmx.padds.w(x86_mmx, x86_mmx) nounwind readnone
835
836define i64 @test38(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
837; ALL-LABEL: @test38
838; ALL: paddsw
839entry:
840  %0 = bitcast <1 x i64> %b to <4 x i16>
841  %1 = bitcast <1 x i64> %a to <4 x i16>
842  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
843  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
844  %2 = tail call x86_mmx @llvm.x86.mmx.padds.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
845  %3 = bitcast x86_mmx %2 to <4 x i16>
846  %4 = bitcast <4 x i16> %3 to <1 x i64>
847  %5 = extractelement <1 x i64> %4, i32 0
848  ret i64 %5
849}
850
851declare x86_mmx @llvm.x86.mmx.padds.b(x86_mmx, x86_mmx) nounwind readnone
852
853define i64 @test37(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
854; ALL-LABEL: @test37
855; ALL: paddsb
856entry:
857  %0 = bitcast <1 x i64> %b to <8 x i8>
858  %1 = bitcast <1 x i64> %a to <8 x i8>
859  %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
860  %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
861  %2 = tail call x86_mmx @llvm.x86.mmx.padds.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
862  %3 = bitcast x86_mmx %2 to <8 x i8>
863  %4 = bitcast <8 x i8> %3 to <1 x i64>
864  %5 = extractelement <1 x i64> %4, i32 0
865  ret i64 %5
866}
867
868declare x86_mmx @llvm.x86.mmx.padd.q(x86_mmx, x86_mmx) nounwind readnone
869
870define i64 @test36(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
871; ALL-LABEL: @test36
872; ALL: paddq
873entry:
874  %0 = extractelement <1 x i64> %a, i32 0
875  %mmx_var = bitcast i64 %0 to x86_mmx
876  %1 = extractelement <1 x i64> %b, i32 0
877  %mmx_var1 = bitcast i64 %1 to x86_mmx
878  %2 = tail call x86_mmx @llvm.x86.mmx.padd.q(x86_mmx %mmx_var, x86_mmx %mmx_var1)
879  %3 = bitcast x86_mmx %2 to i64
880  ret i64 %3
881}
882
883declare x86_mmx @llvm.x86.mmx.padd.d(x86_mmx, x86_mmx) nounwind readnone
884
885define i64 @test35(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
886; ALL-LABEL: @test35
887; ALL: paddd
888entry:
889  %0 = bitcast <1 x i64> %b to <2 x i32>
890  %1 = bitcast <1 x i64> %a to <2 x i32>
891  %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
892  %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
893  %2 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
894  %3 = bitcast x86_mmx %2 to <2 x i32>
895  %4 = bitcast <2 x i32> %3 to <1 x i64>
896  %5 = extractelement <1 x i64> %4, i32 0
897  ret i64 %5
898}
899
900declare x86_mmx @llvm.x86.mmx.padd.w(x86_mmx, x86_mmx) nounwind readnone
901
902define i64 @test34(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
903; ALL-LABEL: @test34
904; ALL: paddw
905entry:
906  %0 = bitcast <1 x i64> %b to <4 x i16>
907  %1 = bitcast <1 x i64> %a to <4 x i16>
908  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
909  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
910  %2 = tail call x86_mmx @llvm.x86.mmx.padd.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
911  %3 = bitcast x86_mmx %2 to <4 x i16>
912  %4 = bitcast <4 x i16> %3 to <1 x i64>
913  %5 = extractelement <1 x i64> %4, i32 0
914  ret i64 %5
915}
916
917declare x86_mmx @llvm.x86.mmx.padd.b(x86_mmx, x86_mmx) nounwind readnone
918
919define i64 @test33(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
920; ALL-LABEL: @test33
921; ALL: paddb
922entry:
923  %0 = bitcast <1 x i64> %b to <8 x i8>
924  %1 = bitcast <1 x i64> %a to <8 x i8>
925  %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
926  %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
927  %2 = tail call x86_mmx @llvm.x86.mmx.padd.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
928  %3 = bitcast x86_mmx %2 to <8 x i8>
929  %4 = bitcast <8 x i8> %3 to <1 x i64>
930  %5 = extractelement <1 x i64> %4, i32 0
931  ret i64 %5
932}
933
934declare x86_mmx @llvm.x86.mmx.psad.bw(x86_mmx, x86_mmx) nounwind readnone
935
936define i64 @test32(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
937; ALL-LABEL: @test32
938; ALL: psadbw
939entry:
940  %0 = bitcast <1 x i64> %b to <8 x i8>
941  %1 = bitcast <1 x i64> %a to <8 x i8>
942  %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
943  %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
944  %2 = tail call x86_mmx @llvm.x86.mmx.psad.bw(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
945  %3 = bitcast x86_mmx %2 to i64
946  ret i64 %3
947}
948
949declare x86_mmx @llvm.x86.mmx.pmins.w(x86_mmx, x86_mmx) nounwind readnone
950
951define i64 @test31(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
952; ALL-LABEL: @test31
953; ALL: pminsw
954entry:
955  %0 = bitcast <1 x i64> %b to <4 x i16>
956  %1 = bitcast <1 x i64> %a to <4 x i16>
957  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
958  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
959  %2 = tail call x86_mmx @llvm.x86.mmx.pmins.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
960  %3 = bitcast x86_mmx %2 to <4 x i16>
961  %4 = bitcast <4 x i16> %3 to <1 x i64>
962  %5 = extractelement <1 x i64> %4, i32 0
963  ret i64 %5
964}
965
966declare x86_mmx @llvm.x86.mmx.pminu.b(x86_mmx, x86_mmx) nounwind readnone
967
968define i64 @test30(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
969; ALL-LABEL: @test30
970; ALL: pminub
971entry:
972  %0 = bitcast <1 x i64> %b to <8 x i8>
973  %1 = bitcast <1 x i64> %a to <8 x i8>
974  %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
975  %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
976  %2 = tail call x86_mmx @llvm.x86.mmx.pminu.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
977  %3 = bitcast x86_mmx %2 to <8 x i8>
978  %4 = bitcast <8 x i8> %3 to <1 x i64>
979  %5 = extractelement <1 x i64> %4, i32 0
980  ret i64 %5
981}
982
983declare x86_mmx @llvm.x86.mmx.pmaxs.w(x86_mmx, x86_mmx) nounwind readnone
984
985define i64 @test29(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
986; ALL-LABEL: @test29
987; ALL: pmaxsw
988entry:
989  %0 = bitcast <1 x i64> %b to <4 x i16>
990  %1 = bitcast <1 x i64> %a to <4 x i16>
991  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
992  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
993  %2 = tail call x86_mmx @llvm.x86.mmx.pmaxs.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
994  %3 = bitcast x86_mmx %2 to <4 x i16>
995  %4 = bitcast <4 x i16> %3 to <1 x i64>
996  %5 = extractelement <1 x i64> %4, i32 0
997  ret i64 %5
998}
999
1000declare x86_mmx @llvm.x86.mmx.pmaxu.b(x86_mmx, x86_mmx) nounwind readnone
1001
1002define i64 @test28(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
1003; ALL-LABEL: @test28
1004; ALL: pmaxub
1005entry:
1006  %0 = bitcast <1 x i64> %b to <8 x i8>
1007  %1 = bitcast <1 x i64> %a to <8 x i8>
1008  %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
1009  %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
1010  %2 = tail call x86_mmx @llvm.x86.mmx.pmaxu.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
1011  %3 = bitcast x86_mmx %2 to <8 x i8>
1012  %4 = bitcast <8 x i8> %3 to <1 x i64>
1013  %5 = extractelement <1 x i64> %4, i32 0
1014  ret i64 %5
1015}
1016
1017declare x86_mmx @llvm.x86.mmx.pavg.w(x86_mmx, x86_mmx) nounwind readnone
1018
1019define i64 @test27(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
1020; ALL-LABEL: @test27
1021; ALL: pavgw
1022entry:
1023  %0 = bitcast <1 x i64> %b to <4 x i16>
1024  %1 = bitcast <1 x i64> %a to <4 x i16>
1025  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
1026  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
1027  %2 = tail call x86_mmx @llvm.x86.mmx.pavg.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
1028  %3 = bitcast x86_mmx %2 to <4 x i16>
1029  %4 = bitcast <4 x i16> %3 to <1 x i64>
1030  %5 = extractelement <1 x i64> %4, i32 0
1031  ret i64 %5
1032}
1033
1034declare x86_mmx @llvm.x86.mmx.pavg.b(x86_mmx, x86_mmx) nounwind readnone
1035
1036define i64 @test26(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
1037; ALL-LABEL: @test26
1038; ALL: pavgb
1039entry:
1040  %0 = bitcast <1 x i64> %b to <8 x i8>
1041  %1 = bitcast <1 x i64> %a to <8 x i8>
1042  %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
1043  %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
1044  %2 = tail call x86_mmx @llvm.x86.mmx.pavg.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
1045  %3 = bitcast x86_mmx %2 to <8 x i8>
1046  %4 = bitcast <8 x i8> %3 to <1 x i64>
1047  %5 = extractelement <1 x i64> %4, i32 0
1048  ret i64 %5
1049}
1050
1051declare void @llvm.x86.mmx.movnt.dq(x86_mmx*, x86_mmx) nounwind
1052
1053define void @test25(<1 x i64>* %p, <1 x i64> %a) nounwind optsize ssp {
1054; ALL-LABEL: @test25
1055; ALL: movntq
1056entry:
1057  %mmx_ptr_var.i = bitcast <1 x i64>* %p to x86_mmx*
1058  %0 = extractelement <1 x i64> %a, i32 0
1059  %mmx_var.i = bitcast i64 %0 to x86_mmx
1060  tail call void @llvm.x86.mmx.movnt.dq(x86_mmx* %mmx_ptr_var.i, x86_mmx %mmx_var.i) nounwind
1061  ret void
1062}
1063
1064declare i32 @llvm.x86.mmx.pmovmskb(x86_mmx) nounwind readnone
1065
1066define i32 @test24(<1 x i64> %a) nounwind readnone optsize ssp {
1067; ALL-LABEL: @test24
1068; ALL: pmovmskb
1069entry:
1070  %0 = bitcast <1 x i64> %a to <8 x i8>
1071  %mmx_var.i = bitcast <8 x i8> %0 to x86_mmx
1072  %1 = tail call i32 @llvm.x86.mmx.pmovmskb(x86_mmx %mmx_var.i) nounwind
1073  ret i32 %1
1074}
1075
1076declare void @llvm.x86.mmx.maskmovq(x86_mmx, x86_mmx, i8*) nounwind
1077
1078define void @test23(<1 x i64> %d, <1 x i64> %n, i8* %p) nounwind optsize ssp {
1079; ALL-LABEL: @test23
1080; ALL: maskmovq
1081entry:
1082  %0 = bitcast <1 x i64> %n to <8 x i8>
1083  %1 = bitcast <1 x i64> %d to <8 x i8>
1084  %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
1085  %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
1086  tail call void @llvm.x86.mmx.maskmovq(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i, i8* %p) nounwind
1087  ret void
1088}
1089
1090declare x86_mmx @llvm.x86.mmx.pmulhu.w(x86_mmx, x86_mmx) nounwind readnone
1091
1092define i64 @test22(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
1093; ALL-LABEL: @test22
1094; ALL: pmulhuw
1095entry:
1096  %0 = bitcast <1 x i64> %b to <4 x i16>
1097  %1 = bitcast <1 x i64> %a to <4 x i16>
1098  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
1099  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
1100  %2 = tail call x86_mmx @llvm.x86.mmx.pmulhu.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
1101  %3 = bitcast x86_mmx %2 to <4 x i16>
1102  %4 = bitcast <4 x i16> %3 to <1 x i64>
1103  %5 = extractelement <1 x i64> %4, i32 0
1104  ret i64 %5
1105}
1106
1107declare x86_mmx @llvm.x86.sse.pshuf.w(x86_mmx, i8) nounwind readnone
1108
1109define i64 @test21(<1 x i64> %a) nounwind readnone optsize ssp {
1110; ALL-LABEL: @test21
1111; X86: pshufw {{.*#+}} mm0 = mem[3,0,0,0]
1112; X64: pshufw {{.*#+}} mm0 = mm0[3,0,0,0]
1113entry:
1114  %0 = bitcast <1 x i64> %a to <4 x i16>
1115  %1 = bitcast <4 x i16> %0 to x86_mmx
1116  %2 = tail call x86_mmx @llvm.x86.sse.pshuf.w(x86_mmx %1, i8 3) nounwind readnone
1117  %3 = bitcast x86_mmx %2 to <4 x i16>
1118  %4 = bitcast <4 x i16> %3 to <1 x i64>
1119  %5 = extractelement <1 x i64> %4, i32 0
1120  ret i64 %5
1121}
1122
1123define i32 @test21_2(<1 x i64> %a) nounwind readnone optsize ssp {
1124; ALL-LABEL: @test21_2
1125; X86: pshufw {{.*#+}} mm0 = mem[3,0,0,0]
1126; X64: pshufw {{.*#+}} mm0 = mm0[3,0,0,0]
1127; ALL: movd
1128entry:
1129  %0 = bitcast <1 x i64> %a to <4 x i16>
1130  %1 = bitcast <4 x i16> %0 to x86_mmx
1131  %2 = tail call x86_mmx @llvm.x86.sse.pshuf.w(x86_mmx %1, i8 3) nounwind readnone
1132  %3 = bitcast x86_mmx %2 to <4 x i16>
1133  %4 = bitcast <4 x i16> %3 to <2 x i32>
1134  %5 = extractelement <2 x i32> %4, i32 0
1135  ret i32 %5
1136}
1137
1138declare x86_mmx @llvm.x86.mmx.pmulu.dq(x86_mmx, x86_mmx) nounwind readnone
1139
1140define i64 @test20(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
1141; ALL-LABEL: @test20
1142; ALL: pmuludq
1143entry:
1144  %0 = bitcast <1 x i64> %b to <2 x i32>
1145  %1 = bitcast <1 x i64> %a to <2 x i32>
1146  %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
1147  %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
1148  %2 = tail call x86_mmx @llvm.x86.mmx.pmulu.dq(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
1149  %3 = bitcast x86_mmx %2 to i64
1150  ret i64 %3
1151}
1152
1153declare <2 x double> @llvm.x86.sse.cvtpi2pd(x86_mmx) nounwind readnone
1154
1155define <2 x double> @test19(<1 x i64> %a) nounwind readnone optsize ssp {
1156; ALL-LABEL: @test19
1157; ALL: cvtpi2pd
1158entry:
1159  %0 = bitcast <1 x i64> %a to <2 x i32>
1160  %1 = bitcast <2 x i32> %0 to x86_mmx
1161  %2 = tail call <2 x double> @llvm.x86.sse.cvtpi2pd(x86_mmx %1) nounwind readnone
1162  ret <2 x double> %2
1163}
1164
1165declare x86_mmx @llvm.x86.sse.cvttpd2pi(<2 x double>) nounwind readnone
1166
1167define i64 @test18(<2 x double> %a) nounwind readnone optsize ssp {
1168; ALL-LABEL: @test18
1169; ALL: cvttpd2pi
1170entry:
1171  %0 = tail call x86_mmx @llvm.x86.sse.cvttpd2pi(<2 x double> %a) nounwind readnone
1172  %1 = bitcast x86_mmx %0 to <2 x i32>
1173  %2 = bitcast <2 x i32> %1 to <1 x i64>
1174  %3 = extractelement <1 x i64> %2, i32 0
1175  ret i64 %3
1176}
1177
1178declare x86_mmx @llvm.x86.sse.cvtpd2pi(<2 x double>) nounwind readnone
1179
1180define i64 @test17(<2 x double> %a) nounwind readnone optsize ssp {
1181; ALL-LABEL: @test17
1182; ALL: cvtpd2pi
1183entry:
1184  %0 = tail call x86_mmx @llvm.x86.sse.cvtpd2pi(<2 x double> %a) nounwind readnone
1185  %1 = bitcast x86_mmx %0 to <2 x i32>
1186  %2 = bitcast <2 x i32> %1 to <1 x i64>
1187  %3 = extractelement <1 x i64> %2, i32 0
1188  ret i64 %3
1189}
1190
1191declare x86_mmx @llvm.x86.mmx.palignr.b(x86_mmx, x86_mmx, i8) nounwind readnone
1192
1193define i64 @test16(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
1194; ALL-LABEL: @test16
1195; ALL: palignr
1196entry:
1197  %0 = extractelement <1 x i64> %a, i32 0
1198  %mmx_var = bitcast i64 %0 to x86_mmx
1199  %1 = extractelement <1 x i64> %b, i32 0
1200  %mmx_var1 = bitcast i64 %1 to x86_mmx
1201  %2 = tail call x86_mmx @llvm.x86.mmx.palignr.b(x86_mmx %mmx_var, x86_mmx %mmx_var1, i8 16)
1202  %3 = bitcast x86_mmx %2 to i64
1203  ret i64 %3
1204}
1205
1206declare x86_mmx @llvm.x86.ssse3.pabs.d(x86_mmx) nounwind readnone
1207
1208define i64 @test15(<1 x i64> %a) nounwind readnone optsize ssp {
1209; ALL-LABEL: @test15
1210; ALL: pabsd
1211entry:
1212  %0 = bitcast <1 x i64> %a to <2 x i32>
1213  %1 = bitcast <2 x i32> %0 to x86_mmx
1214  %2 = tail call x86_mmx @llvm.x86.ssse3.pabs.d(x86_mmx %1) nounwind readnone
1215  %3 = bitcast x86_mmx %2 to <2 x i32>
1216  %4 = bitcast <2 x i32> %3 to <1 x i64>
1217  %5 = extractelement <1 x i64> %4, i32 0
1218  ret i64 %5
1219}
1220
1221declare x86_mmx @llvm.x86.ssse3.pabs.w(x86_mmx) nounwind readnone
1222
1223define i64 @test14(<1 x i64> %a) nounwind readnone optsize ssp {
1224; ALL-LABEL: @test14
1225; ALL: pabsw
1226entry:
1227  %0 = bitcast <1 x i64> %a to <4 x i16>
1228  %1 = bitcast <4 x i16> %0 to x86_mmx
1229  %2 = tail call x86_mmx @llvm.x86.ssse3.pabs.w(x86_mmx %1) nounwind readnone
1230  %3 = bitcast x86_mmx %2 to <4 x i16>
1231  %4 = bitcast <4 x i16> %3 to <1 x i64>
1232  %5 = extractelement <1 x i64> %4, i32 0
1233  ret i64 %5
1234}
1235
1236declare x86_mmx @llvm.x86.ssse3.pabs.b(x86_mmx) nounwind readnone
1237
1238define i64 @test13(<1 x i64> %a) nounwind readnone optsize ssp {
1239; ALL-LABEL: @test13
1240; ALL: pabsb
1241entry:
1242  %0 = bitcast <1 x i64> %a to <8 x i8>
1243  %1 = bitcast <8 x i8> %0 to x86_mmx
1244  %2 = tail call x86_mmx @llvm.x86.ssse3.pabs.b(x86_mmx %1) nounwind readnone
1245  %3 = bitcast x86_mmx %2 to <8 x i8>
1246  %4 = bitcast <8 x i8> %3 to <1 x i64>
1247  %5 = extractelement <1 x i64> %4, i32 0
1248  ret i64 %5
1249}
1250
1251declare x86_mmx @llvm.x86.ssse3.psign.d(x86_mmx, x86_mmx) nounwind readnone
1252
1253define i64 @test12(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
1254; ALL-LABEL: @test12
1255; ALL: psignd
1256entry:
1257  %0 = bitcast <1 x i64> %b to <2 x i32>
1258  %1 = bitcast <1 x i64> %a to <2 x i32>
1259  %2 = bitcast <2 x i32> %1 to x86_mmx
1260  %3 = bitcast <2 x i32> %0 to x86_mmx
1261  %4 = tail call x86_mmx @llvm.x86.ssse3.psign.d(x86_mmx %2, x86_mmx %3) nounwind readnone
1262  %5 = bitcast x86_mmx %4 to <2 x i32>
1263  %6 = bitcast <2 x i32> %5 to <1 x i64>
1264  %7 = extractelement <1 x i64> %6, i32 0
1265  ret i64 %7
1266}
1267
1268declare x86_mmx @llvm.x86.ssse3.psign.w(x86_mmx, x86_mmx) nounwind readnone
1269
1270define i64 @test11(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
1271; ALL-LABEL: @test11
1272; ALL: psignw
1273entry:
1274  %0 = bitcast <1 x i64> %b to <4 x i16>
1275  %1 = bitcast <1 x i64> %a to <4 x i16>
1276  %2 = bitcast <4 x i16> %1 to x86_mmx
1277  %3 = bitcast <4 x i16> %0 to x86_mmx
1278  %4 = tail call x86_mmx @llvm.x86.ssse3.psign.w(x86_mmx %2, x86_mmx %3) nounwind readnone
1279  %5 = bitcast x86_mmx %4 to <4 x i16>
1280  %6 = bitcast <4 x i16> %5 to <1 x i64>
1281  %7 = extractelement <1 x i64> %6, i32 0
1282  ret i64 %7
1283}
1284
1285declare x86_mmx @llvm.x86.ssse3.psign.b(x86_mmx, x86_mmx) nounwind readnone
1286
1287define i64 @test10(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
1288; ALL-LABEL: @test10
1289; ALL: psignb
1290entry:
1291  %0 = bitcast <1 x i64> %b to <8 x i8>
1292  %1 = bitcast <1 x i64> %a to <8 x i8>
1293  %2 = bitcast <8 x i8> %1 to x86_mmx
1294  %3 = bitcast <8 x i8> %0 to x86_mmx
1295  %4 = tail call x86_mmx @llvm.x86.ssse3.psign.b(x86_mmx %2, x86_mmx %3) nounwind readnone
1296  %5 = bitcast x86_mmx %4 to <8 x i8>
1297  %6 = bitcast <8 x i8> %5 to <1 x i64>
1298  %7 = extractelement <1 x i64> %6, i32 0
1299  ret i64 %7
1300}
1301
1302declare x86_mmx @llvm.x86.ssse3.pshuf.b(x86_mmx, x86_mmx) nounwind readnone
1303
1304define i64 @test9(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
1305; ALL-LABEL: @test9
1306; ALL: pshufb
1307entry:
1308  %0 = bitcast <1 x i64> %b to <8 x i8>
1309  %1 = bitcast <1 x i64> %a to <8 x i8>
1310  %2 = bitcast <8 x i8> %1 to x86_mmx
1311  %3 = bitcast <8 x i8> %0 to x86_mmx
1312  %4 = tail call x86_mmx @llvm.x86.ssse3.pshuf.b(x86_mmx %2, x86_mmx %3) nounwind readnone
1313  %5 = bitcast x86_mmx %4 to <8 x i8>
1314  %6 = bitcast <8 x i8> %5 to <1 x i64>
1315  %7 = extractelement <1 x i64> %6, i32 0
1316  ret i64 %7
1317}
1318
1319declare x86_mmx @llvm.x86.ssse3.pmul.hr.sw(x86_mmx, x86_mmx) nounwind readnone
1320
1321define i64 @test8(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
1322; ALL-LABEL: @test8
1323; ALL: pmulhrsw
1324entry:
1325  %0 = bitcast <1 x i64> %b to <4 x i16>
1326  %1 = bitcast <1 x i64> %a to <4 x i16>
1327  %2 = bitcast <4 x i16> %1 to x86_mmx
1328  %3 = bitcast <4 x i16> %0 to x86_mmx
1329  %4 = tail call x86_mmx @llvm.x86.ssse3.pmul.hr.sw(x86_mmx %2, x86_mmx %3) nounwind readnone
1330  %5 = bitcast x86_mmx %4 to <4 x i16>
1331  %6 = bitcast <4 x i16> %5 to <1 x i64>
1332  %7 = extractelement <1 x i64> %6, i32 0
1333  ret i64 %7
1334}
1335
1336declare x86_mmx @llvm.x86.ssse3.pmadd.ub.sw(x86_mmx, x86_mmx) nounwind readnone
1337
1338define i64 @test7(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
1339; ALL-LABEL: @test7
1340; ALL: pmaddubsw
1341entry:
1342  %0 = bitcast <1 x i64> %b to <8 x i8>
1343  %1 = bitcast <1 x i64> %a to <8 x i8>
1344  %2 = bitcast <8 x i8> %1 to x86_mmx
1345  %3 = bitcast <8 x i8> %0 to x86_mmx
1346  %4 = tail call x86_mmx @llvm.x86.ssse3.pmadd.ub.sw(x86_mmx %2, x86_mmx %3) nounwind readnone
1347  %5 = bitcast x86_mmx %4 to <8 x i8>
1348  %6 = bitcast <8 x i8> %5 to <1 x i64>
1349  %7 = extractelement <1 x i64> %6, i32 0
1350  ret i64 %7
1351}
1352
1353declare x86_mmx @llvm.x86.ssse3.phsub.sw(x86_mmx, x86_mmx) nounwind readnone
1354
1355define i64 @test6(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
1356; ALL-LABEL: @test6
1357; ALL: phsubsw
1358entry:
1359  %0 = bitcast <1 x i64> %b to <4 x i16>
1360  %1 = bitcast <1 x i64> %a to <4 x i16>
1361  %2 = bitcast <4 x i16> %1 to x86_mmx
1362  %3 = bitcast <4 x i16> %0 to x86_mmx
1363  %4 = tail call x86_mmx @llvm.x86.ssse3.phsub.sw(x86_mmx %2, x86_mmx %3) nounwind readnone
1364  %5 = bitcast x86_mmx %4 to <4 x i16>
1365  %6 = bitcast <4 x i16> %5 to <1 x i64>
1366  %7 = extractelement <1 x i64> %6, i32 0
1367  ret i64 %7
1368}
1369
1370declare x86_mmx @llvm.x86.ssse3.phsub.d(x86_mmx, x86_mmx) nounwind readnone
1371
1372define i64 @test5(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
1373; ALL-LABEL: @test5
1374; ALL: phsubd
1375entry:
1376  %0 = bitcast <1 x i64> %b to <2 x i32>
1377  %1 = bitcast <1 x i64> %a to <2 x i32>
1378  %2 = bitcast <2 x i32> %1 to x86_mmx
1379  %3 = bitcast <2 x i32> %0 to x86_mmx
1380  %4 = tail call x86_mmx @llvm.x86.ssse3.phsub.d(x86_mmx %2, x86_mmx %3) nounwind readnone
1381  %5 = bitcast x86_mmx %4 to <2 x i32>
1382  %6 = bitcast <2 x i32> %5 to <1 x i64>
1383  %7 = extractelement <1 x i64> %6, i32 0
1384  ret i64 %7
1385}
1386
1387declare x86_mmx @llvm.x86.ssse3.phsub.w(x86_mmx, x86_mmx) nounwind readnone
1388
1389define i64 @test4(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
1390; ALL-LABEL: @test4
1391; ALL: phsubw
1392entry:
1393  %0 = bitcast <1 x i64> %b to <4 x i16>
1394  %1 = bitcast <1 x i64> %a to <4 x i16>
1395  %2 = bitcast <4 x i16> %1 to x86_mmx
1396  %3 = bitcast <4 x i16> %0 to x86_mmx
1397  %4 = tail call x86_mmx @llvm.x86.ssse3.phsub.w(x86_mmx %2, x86_mmx %3) nounwind readnone
1398  %5 = bitcast x86_mmx %4 to <4 x i16>
1399  %6 = bitcast <4 x i16> %5 to <1 x i64>
1400  %7 = extractelement <1 x i64> %6, i32 0
1401  ret i64 %7
1402}
1403
1404declare x86_mmx @llvm.x86.ssse3.phadd.sw(x86_mmx, x86_mmx) nounwind readnone
1405
1406define i64 @test3(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
1407; ALL-LABEL: @test3
1408; ALL: phaddsw
1409entry:
1410  %0 = bitcast <1 x i64> %b to <4 x i16>
1411  %1 = bitcast <1 x i64> %a to <4 x i16>
1412  %2 = bitcast <4 x i16> %1 to x86_mmx
1413  %3 = bitcast <4 x i16> %0 to x86_mmx
1414  %4 = tail call x86_mmx @llvm.x86.ssse3.phadd.sw(x86_mmx %2, x86_mmx %3) nounwind readnone
1415  %5 = bitcast x86_mmx %4 to <4 x i16>
1416  %6 = bitcast <4 x i16> %5 to <1 x i64>
1417  %7 = extractelement <1 x i64> %6, i32 0
1418  ret i64 %7
1419}
1420
1421declare x86_mmx @llvm.x86.ssse3.phadd.d(x86_mmx, x86_mmx) nounwind readnone
1422
1423define i64 @test2(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
1424; ALL-LABEL: @test2
1425; ALL: phaddd
1426entry:
1427  %0 = bitcast <1 x i64> %b to <2 x i32>
1428  %1 = bitcast <1 x i64> %a to <2 x i32>
1429  %2 = bitcast <2 x i32> %1 to x86_mmx
1430  %3 = bitcast <2 x i32> %0 to x86_mmx
1431  %4 = tail call x86_mmx @llvm.x86.ssse3.phadd.d(x86_mmx %2, x86_mmx %3) nounwind readnone
1432  %5 = bitcast x86_mmx %4 to <2 x i32>
1433  %6 = bitcast <2 x i32> %5 to <1 x i64>
1434  %7 = extractelement <1 x i64> %6, i32 0
1435  ret i64 %7
1436}
1437
1438define <4 x float> @test89(<4 x float> %a, x86_mmx %b) nounwind {
1439; ALL-LABEL: @test89
1440; ALL: cvtpi2ps
1441  %c = tail call <4 x float> @llvm.x86.sse.cvtpi2ps(<4 x float> %a, x86_mmx %b)
1442  ret <4 x float> %c
1443}
1444
1445declare <4 x float> @llvm.x86.sse.cvtpi2ps(<4 x float>, x86_mmx) nounwind readnone
1446
1447; ALL-LABEL: test90
1448define void @test90() {
1449; ALL-LABEL: @test90
1450; ALL: emms
1451  call void @llvm.x86.mmx.emms()
1452  ret void
1453}
1454
1455declare void @llvm.x86.mmx.emms()
1456