1; RUN: llc < %s -march=x86-64 -mcpu=penryn | FileCheck -check-prefix=SSE41 %s
2; RUN: llc < %s -march=x86-64 -mcpu=corei7-avx | FileCheck -check-prefix=AVX1 %s
3; RUN: llc < %s -march=x86-64 -mcpu=core-avx2 | FileCheck -check-prefix=AVX2 %s
4
5; PR14887
6; These tests inject a store into the chain to test the inreg versions of pmovsx
7
8define void @test1(<2 x i8>* %in, <2 x i64>* %out) nounwind {
9  %wide.load35 = load <2 x i8>, <2 x i8>* %in, align 1
10  %sext = sext <2 x i8> %wide.load35 to <2 x i64>
11  store <2 x i64> zeroinitializer, <2 x i64>* undef, align 8
12  store <2 x i64> %sext, <2 x i64>* %out, align 8
13  ret void
14
15; SSE41-LABEL: test1:
16; SSE41: pmovsxbq
17
18; AVX1-LABEL: test1:
19; AVX1: vpmovsxbq
20
21; AVX2-LABEL: test1:
22; AVX2: vpmovsxbq
23}
24
25define void @test2(<4 x i8>* %in, <4 x i64>* %out) nounwind {
26  %wide.load35 = load <4 x i8>, <4 x i8>* %in, align 1
27  %sext = sext <4 x i8> %wide.load35 to <4 x i64>
28  store <4 x i64> zeroinitializer, <4 x i64>* undef, align 8
29  store <4 x i64> %sext, <4 x i64>* %out, align 8
30  ret void
31
32; AVX2-LABEL: test2:
33; AVX2: vpmovsxbq
34}
35
36define void @test3(<4 x i8>* %in, <4 x i32>* %out) nounwind {
37  %wide.load35 = load <4 x i8>, <4 x i8>* %in, align 1
38  %sext = sext <4 x i8> %wide.load35 to <4 x i32>
39  store <4 x i32> zeroinitializer, <4 x i32>* undef, align 8
40  store <4 x i32> %sext, <4 x i32>* %out, align 8
41  ret void
42
43; SSE41-LABEL: test3:
44; SSE41: pmovsxbd
45
46; AVX1-LABEL: test3:
47; AVX1: vpmovsxbd
48
49; AVX2-LABEL: test3:
50; AVX2: vpmovsxbd
51}
52
53define void @test4(<8 x i8>* %in, <8 x i32>* %out) nounwind {
54  %wide.load35 = load <8 x i8>, <8 x i8>* %in, align 1
55  %sext = sext <8 x i8> %wide.load35 to <8 x i32>
56  store <8 x i32> zeroinitializer, <8 x i32>* undef, align 8
57  store <8 x i32> %sext, <8 x i32>* %out, align 8
58  ret void
59
60; AVX2-LABEL: test4:
61; AVX2: vpmovsxbd
62}
63
64define void @test5(<8 x i8>* %in, <8 x i16>* %out) nounwind {
65  %wide.load35 = load <8 x i8>, <8 x i8>* %in, align 1
66  %sext = sext <8 x i8> %wide.load35 to <8 x i16>
67  store <8 x i16> zeroinitializer, <8 x i16>* undef, align 8
68  store <8 x i16> %sext, <8 x i16>* %out, align 8
69  ret void
70
71; SSE41-LABEL: test5:
72; SSE41: pmovsxbw
73
74; AVX1-LABEL: test5:
75; AVX1: vpmovsxbw
76
77; AVX2-LABEL: test5:
78; AVX2: vpmovsxbw
79}
80
81define void @test6(<16 x i8>* %in, <16 x i16>* %out) nounwind {
82  %wide.load35 = load <16 x i8>, <16 x i8>* %in, align 1
83  %sext = sext <16 x i8> %wide.load35 to <16 x i16>
84  store <16 x i16> zeroinitializer, <16 x i16>* undef, align 8
85  store <16 x i16> %sext, <16 x i16>* %out, align 8
86  ret void
87
88; AVX2-LABEL: test6:
89; AVX2: vpmovsxbw
90}
91
92define void @test7(<2 x i16>* %in, <2 x i64>* %out) nounwind {
93  %wide.load35 = load <2 x i16>, <2 x i16>* %in, align 1
94  %sext = sext <2 x i16> %wide.load35 to <2 x i64>
95  store <2 x i64> zeroinitializer, <2 x i64>* undef, align 8
96  store <2 x i64> %sext, <2 x i64>* %out, align 8
97  ret void
98
99
100; SSE41-LABEL: test7:
101; SSE41: pmovsxwq
102
103; AVX1-LABEL: test7:
104; AVX1: vpmovsxwq
105
106; AVX2-LABEL: test7:
107; AVX2: vpmovsxwq
108}
109
110define void @test8(<4 x i16>* %in, <4 x i64>* %out) nounwind {
111  %wide.load35 = load <4 x i16>, <4 x i16>* %in, align 1
112  %sext = sext <4 x i16> %wide.load35 to <4 x i64>
113  store <4 x i64> zeroinitializer, <4 x i64>* undef, align 8
114  store <4 x i64> %sext, <4 x i64>* %out, align 8
115  ret void
116
117; AVX2-LABEL: test8:
118; AVX2: vpmovsxwq
119}
120
121define void @test9(<4 x i16>* %in, <4 x i32>* %out) nounwind {
122  %wide.load35 = load <4 x i16>, <4 x i16>* %in, align 1
123  %sext = sext <4 x i16> %wide.load35 to <4 x i32>
124  store <4 x i32> zeroinitializer, <4 x i32>* undef, align 8
125  store <4 x i32> %sext, <4 x i32>* %out, align 8
126  ret void
127
128; SSE41-LABEL: test9:
129; SSE41: pmovsxwd
130
131; AVX1-LABEL: test9:
132; AVX1: vpmovsxwd
133
134; AVX2-LABEL: test9:
135; AVX2: vpmovsxwd
136}
137
138define void @test10(<8 x i16>* %in, <8 x i32>* %out) nounwind {
139  %wide.load35 = load <8 x i16>, <8 x i16>* %in, align 1
140  %sext = sext <8 x i16> %wide.load35 to <8 x i32>
141  store <8 x i32> zeroinitializer, <8 x i32>* undef, align 8
142  store <8 x i32> %sext, <8 x i32>* %out, align 8
143  ret void
144
145; AVX2-LABEL: test10:
146; AVX2: vpmovsxwd
147}
148
149define void @test11(<2 x i32>* %in, <2 x i64>* %out) nounwind {
150  %wide.load35 = load <2 x i32>, <2 x i32>* %in, align 1
151  %sext = sext <2 x i32> %wide.load35 to <2 x i64>
152  store <2 x i64> zeroinitializer, <2 x i64>* undef, align 8
153  store <2 x i64> %sext, <2 x i64>* %out, align 8
154  ret void
155
156; SSE41-LABEL: test11:
157; SSE41: pmovsxdq
158
159; AVX1-LABEL: test11:
160; AVX1: vpmovsxdq
161
162; AVX2-LABEL: test11:
163; AVX2: vpmovsxdq
164}
165
166define void @test12(<4 x i32>* %in, <4 x i64>* %out) nounwind {
167  %wide.load35 = load <4 x i32>, <4 x i32>* %in, align 1
168  %sext = sext <4 x i32> %wide.load35 to <4 x i64>
169  store <4 x i64> zeroinitializer, <4 x i64>* undef, align 8
170  store <4 x i64> %sext, <4 x i64>* %out, align 8
171  ret void
172
173; AVX2-LABEL: test12:
174; AVX2: vpmovsxdq
175}
176