1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -march=x86-64 -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512vl --show-mc-encoding| FileCheck %s
3
4define <8 x i32> @test_256_1(i8 * %addr) {
5; CHECK-LABEL: test_256_1:
6; CHECK:       ## BB#0:
7; CHECK-NEXT:    vmovdqu32 (%rdi), %ymm0 ## encoding: [0x62,0xf1,0x7e,0x28,0x6f,0x07]
8; CHECK-NEXT:    retq ## encoding: [0xc3]
9  %vaddr = bitcast i8* %addr to <8 x i32>*
10  %res = load <8 x i32>, <8 x i32>* %vaddr, align 1
11  ret <8 x i32>%res
12}
13
14define <8 x i32> @test_256_2(i8 * %addr) {
15; CHECK-LABEL: test_256_2:
16; CHECK:       ## BB#0:
17; CHECK-NEXT:    vmovdqa32 (%rdi), %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0x6f,0x07]
18; CHECK-NEXT:    retq ## encoding: [0xc3]
19  %vaddr = bitcast i8* %addr to <8 x i32>*
20  %res = load <8 x i32>, <8 x i32>* %vaddr, align 32
21  ret <8 x i32>%res
22}
23
24define void @test_256_3(i8 * %addr, <4 x i64> %data) {
25; CHECK-LABEL: test_256_3:
26; CHECK:       ## BB#0:
27; CHECK-NEXT:    vmovdqa64 %ymm0, (%rdi) ## encoding: [0x62,0xf1,0xfd,0x28,0x7f,0x07]
28; CHECK-NEXT:    retq ## encoding: [0xc3]
29  %vaddr = bitcast i8* %addr to <4 x i64>*
30  store <4 x i64>%data, <4 x i64>* %vaddr, align 32
31  ret void
32}
33
34define void @test_256_4(i8 * %addr, <8 x i32> %data) {
35; CHECK-LABEL: test_256_4:
36; CHECK:       ## BB#0:
37; CHECK-NEXT:    vmovdqu32 %ymm0, (%rdi) ## encoding: [0x62,0xf1,0x7e,0x28,0x7f,0x07]
38; CHECK-NEXT:    retq ## encoding: [0xc3]
39  %vaddr = bitcast i8* %addr to <8 x i32>*
40  store <8 x i32>%data, <8 x i32>* %vaddr, align 1
41  ret void
42}
43
44define void @test_256_5(i8 * %addr, <8 x i32> %data) {
45; CHECK-LABEL: test_256_5:
46; CHECK:       ## BB#0:
47; CHECK-NEXT:    vmovdqa32 %ymm0, (%rdi) ## encoding: [0x62,0xf1,0x7d,0x28,0x7f,0x07]
48; CHECK-NEXT:    retq ## encoding: [0xc3]
49  %vaddr = bitcast i8* %addr to <8 x i32>*
50  store <8 x i32>%data, <8 x i32>* %vaddr, align 32
51  ret void
52}
53
54define  <4 x i64> @test_256_6(i8 * %addr) {
55; CHECK-LABEL: test_256_6:
56; CHECK:       ## BB#0:
57; CHECK-NEXT:    vmovdqa64 (%rdi), %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x6f,0x07]
58; CHECK-NEXT:    retq ## encoding: [0xc3]
59  %vaddr = bitcast i8* %addr to <4 x i64>*
60  %res = load <4 x i64>, <4 x i64>* %vaddr, align 32
61  ret <4 x i64>%res
62}
63
64define void @test_256_7(i8 * %addr, <4 x i64> %data) {
65; CHECK-LABEL: test_256_7:
66; CHECK:       ## BB#0:
67; CHECK-NEXT:    vmovdqu64 %ymm0, (%rdi) ## encoding: [0x62,0xf1,0xfe,0x28,0x7f,0x07]
68; CHECK-NEXT:    retq ## encoding: [0xc3]
69  %vaddr = bitcast i8* %addr to <4 x i64>*
70  store <4 x i64>%data, <4 x i64>* %vaddr, align 1
71  ret void
72}
73
74define <4 x i64> @test_256_8(i8 * %addr) {
75; CHECK-LABEL: test_256_8:
76; CHECK:       ## BB#0:
77; CHECK-NEXT:    vmovdqu64 (%rdi), %ymm0 ## encoding: [0x62,0xf1,0xfe,0x28,0x6f,0x07]
78; CHECK-NEXT:    retq ## encoding: [0xc3]
79  %vaddr = bitcast i8* %addr to <4 x i64>*
80  %res = load <4 x i64>, <4 x i64>* %vaddr, align 1
81  ret <4 x i64>%res
82}
83
84define void @test_256_9(i8 * %addr, <4 x double> %data) {
85; CHECK-LABEL: test_256_9:
86; CHECK:       ## BB#0:
87; CHECK-NEXT:    vmovapd %ymm0, (%rdi) ## encoding: [0x62,0xf1,0xfd,0x28,0x29,0x07]
88; CHECK-NEXT:    retq ## encoding: [0xc3]
89  %vaddr = bitcast i8* %addr to <4 x double>*
90  store <4 x double>%data, <4 x double>* %vaddr, align 32
91  ret void
92}
93
94define <4 x double> @test_256_10(i8 * %addr) {
95; CHECK-LABEL: test_256_10:
96; CHECK:       ## BB#0:
97; CHECK-NEXT:    vmovapd (%rdi), %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x28,0x07]
98; CHECK-NEXT:    retq ## encoding: [0xc3]
99  %vaddr = bitcast i8* %addr to <4 x double>*
100  %res = load <4 x double>, <4 x double>* %vaddr, align 32
101  ret <4 x double>%res
102}
103
104define void @test_256_11(i8 * %addr, <8 x float> %data) {
105; CHECK-LABEL: test_256_11:
106; CHECK:       ## BB#0:
107; CHECK-NEXT:    vmovaps %ymm0, (%rdi) ## encoding: [0x62,0xf1,0x7c,0x28,0x29,0x07]
108; CHECK-NEXT:    retq ## encoding: [0xc3]
109  %vaddr = bitcast i8* %addr to <8 x float>*
110  store <8 x float>%data, <8 x float>* %vaddr, align 32
111  ret void
112}
113
114define <8 x float> @test_256_12(i8 * %addr) {
115; CHECK-LABEL: test_256_12:
116; CHECK:       ## BB#0:
117; CHECK-NEXT:    vmovaps (%rdi), %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0x07]
118; CHECK-NEXT:    retq ## encoding: [0xc3]
119  %vaddr = bitcast i8* %addr to <8 x float>*
120  %res = load <8 x float>, <8 x float>* %vaddr, align 32
121  ret <8 x float>%res
122}
123
124define void @test_256_13(i8 * %addr, <4 x double> %data) {
125; CHECK-LABEL: test_256_13:
126; CHECK:       ## BB#0:
127; CHECK-NEXT:    vmovupd %ymm0, (%rdi) ## encoding: [0x62,0xf1,0xfd,0x28,0x11,0x07]
128; CHECK-NEXT:    retq ## encoding: [0xc3]
129  %vaddr = bitcast i8* %addr to <4 x double>*
130  store <4 x double>%data, <4 x double>* %vaddr, align 1
131  ret void
132}
133
134define <4 x double> @test_256_14(i8 * %addr) {
135; CHECK-LABEL: test_256_14:
136; CHECK:       ## BB#0:
137; CHECK-NEXT:    vmovupd (%rdi), %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x10,0x07]
138; CHECK-NEXT:    retq ## encoding: [0xc3]
139  %vaddr = bitcast i8* %addr to <4 x double>*
140  %res = load <4 x double>, <4 x double>* %vaddr, align 1
141  ret <4 x double>%res
142}
143
144define void @test_256_15(i8 * %addr, <8 x float> %data) {
145; CHECK-LABEL: test_256_15:
146; CHECK:       ## BB#0:
147; CHECK-NEXT:    vmovups %ymm0, (%rdi) ## encoding: [0x62,0xf1,0x7c,0x28,0x11,0x07]
148; CHECK-NEXT:    retq ## encoding: [0xc3]
149  %vaddr = bitcast i8* %addr to <8 x float>*
150  store <8 x float>%data, <8 x float>* %vaddr, align 1
151  ret void
152}
153
154define <8 x float> @test_256_16(i8 * %addr) {
155; CHECK-LABEL: test_256_16:
156; CHECK:       ## BB#0:
157; CHECK-NEXT:    vmovups (%rdi), %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x10,0x07]
158; CHECK-NEXT:    retq ## encoding: [0xc3]
159  %vaddr = bitcast i8* %addr to <8 x float>*
160  %res = load <8 x float>, <8 x float>* %vaddr, align 1
161  ret <8 x float>%res
162}
163
164define <8 x i32> @test_256_17(i8 * %addr, <8 x i32> %old, <8 x i32> %mask1) {
165; CHECK-LABEL: test_256_17:
166; CHECK:       ## BB#0:
167; CHECK-NEXT:    vpxord %ymm2, %ymm2, %ymm2 ## encoding: [0x62,0xf1,0x6d,0x28,0xef,0xd2]
168; CHECK-NEXT:    vpcmpneqd %ymm2, %ymm1, %k1 ## encoding: [0x62,0xf3,0x75,0x28,0x1f,0xca,0x04]
169; CHECK-NEXT:    vpblendmd (%rdi), %ymm0, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x64,0x07]
170; CHECK-NEXT:    retq ## encoding: [0xc3]
171  %mask = icmp ne <8 x i32> %mask1, zeroinitializer
172  %vaddr = bitcast i8* %addr to <8 x i32>*
173  %r = load <8 x i32>, <8 x i32>* %vaddr, align 32
174  %res = select <8 x i1> %mask, <8 x i32> %r, <8 x i32> %old
175  ret <8 x i32>%res
176}
177
178define <8 x i32> @test_256_18(i8 * %addr, <8 x i32> %old, <8 x i32> %mask1) {
179; CHECK-LABEL: test_256_18:
180; CHECK:       ## BB#0:
181; CHECK-NEXT:    vpxord %ymm2, %ymm2, %ymm2 ## encoding: [0x62,0xf1,0x6d,0x28,0xef,0xd2]
182; CHECK-NEXT:    vpcmpneqd %ymm2, %ymm1, %k1 ## encoding: [0x62,0xf3,0x75,0x28,0x1f,0xca,0x04]
183; CHECK-NEXT:    vpblendmd (%rdi), %ymm0, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x64,0x07]
184; CHECK-NEXT:    retq ## encoding: [0xc3]
185  %mask = icmp ne <8 x i32> %mask1, zeroinitializer
186  %vaddr = bitcast i8* %addr to <8 x i32>*
187  %r = load <8 x i32>, <8 x i32>* %vaddr, align 1
188  %res = select <8 x i1> %mask, <8 x i32> %r, <8 x i32> %old
189  ret <8 x i32>%res
190}
191
192define <8 x i32> @test_256_19(i8 * %addr, <8 x i32> %mask1) {
193; CHECK-LABEL: test_256_19:
194; CHECK:       ## BB#0:
195; CHECK-NEXT:    vpxord %ymm1, %ymm1, %ymm1 ## encoding: [0x62,0xf1,0x75,0x28,0xef,0xc9]
196; CHECK-NEXT:    vpcmpneqd %ymm1, %ymm0, %k1 ## encoding: [0x62,0xf3,0x7d,0x28,0x1f,0xc9,0x04]
197; CHECK-NEXT:    vmovdqa32 (%rdi), %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0x6f,0x07]
198; CHECK-NEXT:    retq ## encoding: [0xc3]
199  %mask = icmp ne <8 x i32> %mask1, zeroinitializer
200  %vaddr = bitcast i8* %addr to <8 x i32>*
201  %r = load <8 x i32>, <8 x i32>* %vaddr, align 32
202  %res = select <8 x i1> %mask, <8 x i32> %r, <8 x i32> zeroinitializer
203  ret <8 x i32>%res
204}
205
206define <8 x i32> @test_256_20(i8 * %addr, <8 x i32> %mask1) {
207; CHECK-LABEL: test_256_20:
208; CHECK:       ## BB#0:
209; CHECK-NEXT:    vpxord %ymm1, %ymm1, %ymm1 ## encoding: [0x62,0xf1,0x75,0x28,0xef,0xc9]
210; CHECK-NEXT:    vpcmpneqd %ymm1, %ymm0, %k1 ## encoding: [0x62,0xf3,0x7d,0x28,0x1f,0xc9,0x04]
211; CHECK-NEXT:    vmovdqu32 (%rdi), %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7e,0xa9,0x6f,0x07]
212; CHECK-NEXT:    retq ## encoding: [0xc3]
213  %mask = icmp ne <8 x i32> %mask1, zeroinitializer
214  %vaddr = bitcast i8* %addr to <8 x i32>*
215  %r = load <8 x i32>, <8 x i32>* %vaddr, align 1
216  %res = select <8 x i1> %mask, <8 x i32> %r, <8 x i32> zeroinitializer
217  ret <8 x i32>%res
218}
219
220define <4 x i64> @test_256_21(i8 * %addr, <4 x i64> %old, <4 x i64> %mask1) {
221; CHECK-LABEL: test_256_21:
222; CHECK:       ## BB#0:
223; CHECK-NEXT:    vpxord %ymm2, %ymm2, %ymm2 ## encoding: [0x62,0xf1,0x6d,0x28,0xef,0xd2]
224; CHECK-NEXT:    vpcmpneqq %ymm2, %ymm1, %k1 ## encoding: [0x62,0xf3,0xf5,0x28,0x1f,0xca,0x04]
225; CHECK-NEXT:    vpblendmq (%rdi), %ymm0, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x64,0x07]
226; CHECK-NEXT:    retq ## encoding: [0xc3]
227  %mask = icmp ne <4 x i64> %mask1, zeroinitializer
228  %vaddr = bitcast i8* %addr to <4 x i64>*
229  %r = load <4 x i64>, <4 x i64>* %vaddr, align 32
230  %res = select <4 x i1> %mask, <4 x i64> %r, <4 x i64> %old
231  ret <4 x i64>%res
232}
233
234define <4 x i64> @test_256_22(i8 * %addr, <4 x i64> %old, <4 x i64> %mask1) {
235; CHECK-LABEL: test_256_22:
236; CHECK:       ## BB#0:
237; CHECK-NEXT:    vpxord %ymm2, %ymm2, %ymm2 ## encoding: [0x62,0xf1,0x6d,0x28,0xef,0xd2]
238; CHECK-NEXT:    vpcmpneqq %ymm2, %ymm1, %k1 ## encoding: [0x62,0xf3,0xf5,0x28,0x1f,0xca,0x04]
239; CHECK-NEXT:    vpblendmq (%rdi), %ymm0, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x64,0x07]
240; CHECK-NEXT:    retq ## encoding: [0xc3]
241  %mask = icmp ne <4 x i64> %mask1, zeroinitializer
242  %vaddr = bitcast i8* %addr to <4 x i64>*
243  %r = load <4 x i64>, <4 x i64>* %vaddr, align 1
244  %res = select <4 x i1> %mask, <4 x i64> %r, <4 x i64> %old
245  ret <4 x i64>%res
246}
247
248define <4 x i64> @test_256_23(i8 * %addr, <4 x i64> %mask1) {
249; CHECK-LABEL: test_256_23:
250; CHECK:       ## BB#0:
251; CHECK-NEXT:    vpxord %ymm1, %ymm1, %ymm1 ## encoding: [0x62,0xf1,0x75,0x28,0xef,0xc9]
252; CHECK-NEXT:    vpcmpneqq %ymm1, %ymm0, %k1 ## encoding: [0x62,0xf3,0xfd,0x28,0x1f,0xc9,0x04]
253; CHECK-NEXT:    vmovdqa64 (%rdi), %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xa9,0x6f,0x07]
254; CHECK-NEXT:    retq ## encoding: [0xc3]
255  %mask = icmp ne <4 x i64> %mask1, zeroinitializer
256  %vaddr = bitcast i8* %addr to <4 x i64>*
257  %r = load <4 x i64>, <4 x i64>* %vaddr, align 32
258  %res = select <4 x i1> %mask, <4 x i64> %r, <4 x i64> zeroinitializer
259  ret <4 x i64>%res
260}
261
262define <4 x i64> @test_256_24(i8 * %addr, <4 x i64> %mask1) {
263; CHECK-LABEL: test_256_24:
264; CHECK:       ## BB#0:
265; CHECK-NEXT:    vpxord %ymm1, %ymm1, %ymm1 ## encoding: [0x62,0xf1,0x75,0x28,0xef,0xc9]
266; CHECK-NEXT:    vpcmpneqq %ymm1, %ymm0, %k1 ## encoding: [0x62,0xf3,0xfd,0x28,0x1f,0xc9,0x04]
267; CHECK-NEXT:    vmovdqu64 (%rdi), %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfe,0xa9,0x6f,0x07]
268; CHECK-NEXT:    retq ## encoding: [0xc3]
269  %mask = icmp ne <4 x i64> %mask1, zeroinitializer
270  %vaddr = bitcast i8* %addr to <4 x i64>*
271  %r = load <4 x i64>, <4 x i64>* %vaddr, align 1
272  %res = select <4 x i1> %mask, <4 x i64> %r, <4 x i64> zeroinitializer
273  ret <4 x i64>%res
274}
275
276define <8 x float> @test_256_25(i8 * %addr, <8 x float> %old, <8 x float> %mask1) {
277; CHECK-LABEL: test_256_25:
278; CHECK:       ## BB#0:
279; CHECK-NEXT:    vpxord %ymm2, %ymm2, %ymm2 ## encoding: [0x62,0xf1,0x6d,0x28,0xef,0xd2]
280; CHECK-NEXT:    vcmpordps %ymm2, %ymm1, %k1 ## encoding: [0x62,0xf1,0x74,0x28,0xc2,0xca,0x07]
281; CHECK-NEXT:    vcmpneqps %ymm2, %ymm1, %k1 {%k1} ## encoding: [0x62,0xf1,0x74,0x29,0xc2,0xca,0x04]
282; CHECK-NEXT:    vblendmps (%rdi), %ymm0, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x65,0x07]
283; CHECK-NEXT:    retq ## encoding: [0xc3]
284  %mask = fcmp one <8 x float> %mask1, zeroinitializer
285  %vaddr = bitcast i8* %addr to <8 x float>*
286  %r = load <8 x float>, <8 x float>* %vaddr, align 32
287  %res = select <8 x i1> %mask, <8 x float> %r, <8 x float> %old
288  ret <8 x float>%res
289}
290
291define <8 x float> @test_256_26(i8 * %addr, <8 x float> %old, <8 x float> %mask1) {
292; CHECK-LABEL: test_256_26:
293; CHECK:       ## BB#0:
294; CHECK-NEXT:    vpxord %ymm2, %ymm2, %ymm2 ## encoding: [0x62,0xf1,0x6d,0x28,0xef,0xd2]
295; CHECK-NEXT:    vcmpordps %ymm2, %ymm1, %k1 ## encoding: [0x62,0xf1,0x74,0x28,0xc2,0xca,0x07]
296; CHECK-NEXT:    vcmpneqps %ymm2, %ymm1, %k1 {%k1} ## encoding: [0x62,0xf1,0x74,0x29,0xc2,0xca,0x04]
297; CHECK-NEXT:    vblendmps (%rdi), %ymm0, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x65,0x07]
298; CHECK-NEXT:    retq ## encoding: [0xc3]
299  %mask = fcmp one <8 x float> %mask1, zeroinitializer
300  %vaddr = bitcast i8* %addr to <8 x float>*
301  %r = load <8 x float>, <8 x float>* %vaddr, align 1
302  %res = select <8 x i1> %mask, <8 x float> %r, <8 x float> %old
303  ret <8 x float>%res
304}
305
306define <8 x float> @test_256_27(i8 * %addr, <8 x float> %mask1) {
307; CHECK-LABEL: test_256_27:
308; CHECK:       ## BB#0:
309; CHECK-NEXT:    vpxord %ymm1, %ymm1, %ymm1 ## encoding: [0x62,0xf1,0x75,0x28,0xef,0xc9]
310; CHECK-NEXT:    vcmpordps %ymm1, %ymm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x28,0xc2,0xc9,0x07]
311; CHECK-NEXT:    vcmpneqps %ymm1, %ymm0, %k1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0xc2,0xc9,0x04]
312; CHECK-NEXT:    vmovaps (%rdi), %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xa9,0x28,0x07]
313; CHECK-NEXT:    retq ## encoding: [0xc3]
314  %mask = fcmp one <8 x float> %mask1, zeroinitializer
315  %vaddr = bitcast i8* %addr to <8 x float>*
316  %r = load <8 x float>, <8 x float>* %vaddr, align 32
317  %res = select <8 x i1> %mask, <8 x float> %r, <8 x float> zeroinitializer
318  ret <8 x float>%res
319}
320
321define <8 x float> @test_256_28(i8 * %addr, <8 x float> %mask1) {
322; CHECK-LABEL: test_256_28:
323; CHECK:       ## BB#0:
324; CHECK-NEXT:    vpxord %ymm1, %ymm1, %ymm1 ## encoding: [0x62,0xf1,0x75,0x28,0xef,0xc9]
325; CHECK-NEXT:    vcmpordps %ymm1, %ymm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x28,0xc2,0xc9,0x07]
326; CHECK-NEXT:    vcmpneqps %ymm1, %ymm0, %k1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0xc2,0xc9,0x04]
327; CHECK-NEXT:    vmovups (%rdi), %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xa9,0x10,0x07]
328; CHECK-NEXT:    retq ## encoding: [0xc3]
329  %mask = fcmp one <8 x float> %mask1, zeroinitializer
330  %vaddr = bitcast i8* %addr to <8 x float>*
331  %r = load <8 x float>, <8 x float>* %vaddr, align 1
332  %res = select <8 x i1> %mask, <8 x float> %r, <8 x float> zeroinitializer
333  ret <8 x float>%res
334}
335
336define <4 x double> @test_256_29(i8 * %addr, <4 x double> %old, <4 x i64> %mask1) {
337; CHECK-LABEL: test_256_29:
338; CHECK:       ## BB#0:
339; CHECK-NEXT:    vpxord %ymm2, %ymm2, %ymm2 ## encoding: [0x62,0xf1,0x6d,0x28,0xef,0xd2]
340; CHECK-NEXT:    vpcmpneqq %ymm2, %ymm1, %k1 ## encoding: [0x62,0xf3,0xf5,0x28,0x1f,0xca,0x04]
341; CHECK-NEXT:    vblendmpd (%rdi), %ymm0, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x65,0x07]
342; CHECK-NEXT:    retq ## encoding: [0xc3]
343  %mask = icmp ne <4 x i64> %mask1, zeroinitializer
344  %vaddr = bitcast i8* %addr to <4 x double>*
345  %r = load <4 x double>, <4 x double>* %vaddr, align 32
346  %res = select <4 x i1> %mask, <4 x double> %r, <4 x double> %old
347  ret <4 x double>%res
348}
349
350define <4 x double> @test_256_30(i8 * %addr, <4 x double> %old, <4 x i64> %mask1) {
351; CHECK-LABEL: test_256_30:
352; CHECK:       ## BB#0:
353; CHECK-NEXT:    vpxord %ymm2, %ymm2, %ymm2 ## encoding: [0x62,0xf1,0x6d,0x28,0xef,0xd2]
354; CHECK-NEXT:    vpcmpneqq %ymm2, %ymm1, %k1 ## encoding: [0x62,0xf3,0xf5,0x28,0x1f,0xca,0x04]
355; CHECK-NEXT:    vblendmpd (%rdi), %ymm0, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x65,0x07]
356; CHECK-NEXT:    retq ## encoding: [0xc3]
357  %mask = icmp ne <4 x i64> %mask1, zeroinitializer
358  %vaddr = bitcast i8* %addr to <4 x double>*
359  %r = load <4 x double>, <4 x double>* %vaddr, align 1
360  %res = select <4 x i1> %mask, <4 x double> %r, <4 x double> %old
361  ret <4 x double>%res
362}
363
364define <4 x double> @test_256_31(i8 * %addr, <4 x i64> %mask1) {
365; CHECK-LABEL: test_256_31:
366; CHECK:       ## BB#0:
367; CHECK-NEXT:    vpxord %ymm1, %ymm1, %ymm1 ## encoding: [0x62,0xf1,0x75,0x28,0xef,0xc9]
368; CHECK-NEXT:    vpcmpneqq %ymm1, %ymm0, %k1 ## encoding: [0x62,0xf3,0xfd,0x28,0x1f,0xc9,0x04]
369; CHECK-NEXT:    vmovapd (%rdi), %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xa9,0x28,0x07]
370; CHECK-NEXT:    retq ## encoding: [0xc3]
371  %mask = icmp ne <4 x i64> %mask1, zeroinitializer
372  %vaddr = bitcast i8* %addr to <4 x double>*
373  %r = load <4 x double>, <4 x double>* %vaddr, align 32
374  %res = select <4 x i1> %mask, <4 x double> %r, <4 x double> zeroinitializer
375  ret <4 x double>%res
376}
377
378define <4 x double> @test_256_32(i8 * %addr, <4 x i64> %mask1) {
379; CHECK-LABEL: test_256_32:
380; CHECK:       ## BB#0:
381; CHECK-NEXT:    vpxord %ymm1, %ymm1, %ymm1 ## encoding: [0x62,0xf1,0x75,0x28,0xef,0xc9]
382; CHECK-NEXT:    vpcmpneqq %ymm1, %ymm0, %k1 ## encoding: [0x62,0xf3,0xfd,0x28,0x1f,0xc9,0x04]
383; CHECK-NEXT:    vmovupd (%rdi), %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xa9,0x10,0x07]
384; CHECK-NEXT:    retq ## encoding: [0xc3]
385  %mask = icmp ne <4 x i64> %mask1, zeroinitializer
386  %vaddr = bitcast i8* %addr to <4 x double>*
387  %r = load <4 x double>, <4 x double>* %vaddr, align 1
388  %res = select <4 x i1> %mask, <4 x double> %r, <4 x double> zeroinitializer
389  ret <4 x double>%res
390}
391
392define <4 x i32> @test_128_1(i8 * %addr) {
393; CHECK-LABEL: test_128_1:
394; CHECK:       ## BB#0:
395; CHECK-NEXT:    vmovdqu32 (%rdi), %xmm0 ## encoding: [0x62,0xf1,0x7e,0x08,0x6f,0x07]
396; CHECK-NEXT:    retq ## encoding: [0xc3]
397  %vaddr = bitcast i8* %addr to <4 x i32>*
398  %res = load <4 x i32>, <4 x i32>* %vaddr, align 1
399  ret <4 x i32>%res
400}
401
402define <4 x i32> @test_128_2(i8 * %addr) {
403; CHECK-LABEL: test_128_2:
404; CHECK:       ## BB#0:
405; CHECK-NEXT:    vmovdqa32 (%rdi), %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x6f,0x07]
406; CHECK-NEXT:    retq ## encoding: [0xc3]
407  %vaddr = bitcast i8* %addr to <4 x i32>*
408  %res = load <4 x i32>, <4 x i32>* %vaddr, align 16
409  ret <4 x i32>%res
410}
411
412define void @test_128_3(i8 * %addr, <2 x i64> %data) {
413; CHECK-LABEL: test_128_3:
414; CHECK:       ## BB#0:
415; CHECK-NEXT:    vmovdqa64 %xmm0, (%rdi) ## encoding: [0x62,0xf1,0xfd,0x08,0x7f,0x07]
416; CHECK-NEXT:    retq ## encoding: [0xc3]
417  %vaddr = bitcast i8* %addr to <2 x i64>*
418  store <2 x i64>%data, <2 x i64>* %vaddr, align 16
419  ret void
420}
421
422define void @test_128_4(i8 * %addr, <4 x i32> %data) {
423; CHECK-LABEL: test_128_4:
424; CHECK:       ## BB#0:
425; CHECK-NEXT:    vmovdqu32 %xmm0, (%rdi) ## encoding: [0x62,0xf1,0x7e,0x08,0x7f,0x07]
426; CHECK-NEXT:    retq ## encoding: [0xc3]
427  %vaddr = bitcast i8* %addr to <4 x i32>*
428  store <4 x i32>%data, <4 x i32>* %vaddr, align 1
429  ret void
430}
431
432define void @test_128_5(i8 * %addr, <4 x i32> %data) {
433; CHECK-LABEL: test_128_5:
434; CHECK:       ## BB#0:
435; CHECK-NEXT:    vmovdqa32 %xmm0, (%rdi) ## encoding: [0x62,0xf1,0x7d,0x08,0x7f,0x07]
436; CHECK-NEXT:    retq ## encoding: [0xc3]
437  %vaddr = bitcast i8* %addr to <4 x i32>*
438  store <4 x i32>%data, <4 x i32>* %vaddr, align 16
439  ret void
440}
441
442define  <2 x i64> @test_128_6(i8 * %addr) {
443; CHECK-LABEL: test_128_6:
444; CHECK:       ## BB#0:
445; CHECK-NEXT:    vmovdqa64 (%rdi), %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x6f,0x07]
446; CHECK-NEXT:    retq ## encoding: [0xc3]
447  %vaddr = bitcast i8* %addr to <2 x i64>*
448  %res = load <2 x i64>, <2 x i64>* %vaddr, align 16
449  ret <2 x i64>%res
450}
451
452define void @test_128_7(i8 * %addr, <2 x i64> %data) {
453; CHECK-LABEL: test_128_7:
454; CHECK:       ## BB#0:
455; CHECK-NEXT:    vmovdqu64 %xmm0, (%rdi) ## encoding: [0x62,0xf1,0xfe,0x08,0x7f,0x07]
456; CHECK-NEXT:    retq ## encoding: [0xc3]
457  %vaddr = bitcast i8* %addr to <2 x i64>*
458  store <2 x i64>%data, <2 x i64>* %vaddr, align 1
459  ret void
460}
461
462define <2 x i64> @test_128_8(i8 * %addr) {
463; CHECK-LABEL: test_128_8:
464; CHECK:       ## BB#0:
465; CHECK-NEXT:    vmovdqu64 (%rdi), %xmm0 ## encoding: [0x62,0xf1,0xfe,0x08,0x6f,0x07]
466; CHECK-NEXT:    retq ## encoding: [0xc3]
467  %vaddr = bitcast i8* %addr to <2 x i64>*
468  %res = load <2 x i64>, <2 x i64>* %vaddr, align 1
469  ret <2 x i64>%res
470}
471
472define void @test_128_9(i8 * %addr, <2 x double> %data) {
473; CHECK-LABEL: test_128_9:
474; CHECK:       ## BB#0:
475; CHECK-NEXT:    vmovapd %xmm0, (%rdi) ## encoding: [0x62,0xf1,0xfd,0x08,0x29,0x07]
476; CHECK-NEXT:    retq ## encoding: [0xc3]
477  %vaddr = bitcast i8* %addr to <2 x double>*
478  store <2 x double>%data, <2 x double>* %vaddr, align 16
479  ret void
480}
481
482define <2 x double> @test_128_10(i8 * %addr) {
483; CHECK-LABEL: test_128_10:
484; CHECK:       ## BB#0:
485; CHECK-NEXT:    vmovapd (%rdi), %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x28,0x07]
486; CHECK-NEXT:    retq ## encoding: [0xc3]
487  %vaddr = bitcast i8* %addr to <2 x double>*
488  %res = load <2 x double>, <2 x double>* %vaddr, align 16
489  ret <2 x double>%res
490}
491
492define void @test_128_11(i8 * %addr, <4 x float> %data) {
493; CHECK-LABEL: test_128_11:
494; CHECK:       ## BB#0:
495; CHECK-NEXT:    vmovaps %xmm0, (%rdi) ## encoding: [0x62,0xf1,0x7c,0x08,0x29,0x07]
496; CHECK-NEXT:    retq ## encoding: [0xc3]
497  %vaddr = bitcast i8* %addr to <4 x float>*
498  store <4 x float>%data, <4 x float>* %vaddr, align 16
499  ret void
500}
501
502define <4 x float> @test_128_12(i8 * %addr) {
503; CHECK-LABEL: test_128_12:
504; CHECK:       ## BB#0:
505; CHECK-NEXT:    vmovaps (%rdi), %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0x07]
506; CHECK-NEXT:    retq ## encoding: [0xc3]
507  %vaddr = bitcast i8* %addr to <4 x float>*
508  %res = load <4 x float>, <4 x float>* %vaddr, align 16
509  ret <4 x float>%res
510}
511
512define void @test_128_13(i8 * %addr, <2 x double> %data) {
513; CHECK-LABEL: test_128_13:
514; CHECK:       ## BB#0:
515; CHECK-NEXT:    vmovupd %xmm0, (%rdi) ## encoding: [0x62,0xf1,0xfd,0x08,0x11,0x07]
516; CHECK-NEXT:    retq ## encoding: [0xc3]
517  %vaddr = bitcast i8* %addr to <2 x double>*
518  store <2 x double>%data, <2 x double>* %vaddr, align 1
519  ret void
520}
521
522define <2 x double> @test_128_14(i8 * %addr) {
523; CHECK-LABEL: test_128_14:
524; CHECK:       ## BB#0:
525; CHECK-NEXT:    vmovupd (%rdi), %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x10,0x07]
526; CHECK-NEXT:    retq ## encoding: [0xc3]
527  %vaddr = bitcast i8* %addr to <2 x double>*
528  %res = load <2 x double>, <2 x double>* %vaddr, align 1
529  ret <2 x double>%res
530}
531
532define void @test_128_15(i8 * %addr, <4 x float> %data) {
533; CHECK-LABEL: test_128_15:
534; CHECK:       ## BB#0:
535; CHECK-NEXT:    vmovups %xmm0, (%rdi) ## encoding: [0x62,0xf1,0x7c,0x08,0x11,0x07]
536; CHECK-NEXT:    retq ## encoding: [0xc3]
537  %vaddr = bitcast i8* %addr to <4 x float>*
538  store <4 x float>%data, <4 x float>* %vaddr, align 1
539  ret void
540}
541
542define <4 x float> @test_128_16(i8 * %addr) {
543; CHECK-LABEL: test_128_16:
544; CHECK:       ## BB#0:
545; CHECK-NEXT:    vmovups (%rdi), %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x10,0x07]
546; CHECK-NEXT:    retq ## encoding: [0xc3]
547  %vaddr = bitcast i8* %addr to <4 x float>*
548  %res = load <4 x float>, <4 x float>* %vaddr, align 1
549  ret <4 x float>%res
550}
551
552define <4 x i32> @test_128_17(i8 * %addr, <4 x i32> %old, <4 x i32> %mask1) {
553; CHECK-LABEL: test_128_17:
554; CHECK:       ## BB#0:
555; CHECK-NEXT:    vpxord %xmm2, %xmm2, %xmm2 ## encoding: [0x62,0xf1,0x6d,0x08,0xef,0xd2]
556; CHECK-NEXT:    vpcmpneqd %xmm2, %xmm1, %k1 ## encoding: [0x62,0xf3,0x75,0x08,0x1f,0xca,0x04]
557; CHECK-NEXT:    vpblendmd (%rdi), %xmm0, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x64,0x07]
558; CHECK-NEXT:    retq ## encoding: [0xc3]
559  %mask = icmp ne <4 x i32> %mask1, zeroinitializer
560  %vaddr = bitcast i8* %addr to <4 x i32>*
561  %r = load <4 x i32>, <4 x i32>* %vaddr, align 16
562  %res = select <4 x i1> %mask, <4 x i32> %r, <4 x i32> %old
563  ret <4 x i32>%res
564}
565
566define <4 x i32> @test_128_18(i8 * %addr, <4 x i32> %old, <4 x i32> %mask1) {
567; CHECK-LABEL: test_128_18:
568; CHECK:       ## BB#0:
569; CHECK-NEXT:    vpxord %xmm2, %xmm2, %xmm2 ## encoding: [0x62,0xf1,0x6d,0x08,0xef,0xd2]
570; CHECK-NEXT:    vpcmpneqd %xmm2, %xmm1, %k1 ## encoding: [0x62,0xf3,0x75,0x08,0x1f,0xca,0x04]
571; CHECK-NEXT:    vpblendmd (%rdi), %xmm0, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x64,0x07]
572; CHECK-NEXT:    retq ## encoding: [0xc3]
573  %mask = icmp ne <4 x i32> %mask1, zeroinitializer
574  %vaddr = bitcast i8* %addr to <4 x i32>*
575  %r = load <4 x i32>, <4 x i32>* %vaddr, align 1
576  %res = select <4 x i1> %mask, <4 x i32> %r, <4 x i32> %old
577  ret <4 x i32>%res
578}
579
580define <4 x i32> @test_128_19(i8 * %addr, <4 x i32> %mask1) {
581; CHECK-LABEL: test_128_19:
582; CHECK:       ## BB#0:
583; CHECK-NEXT:    vpxord %xmm1, %xmm1, %xmm1 ## encoding: [0x62,0xf1,0x75,0x08,0xef,0xc9]
584; CHECK-NEXT:    vpcmpneqd %xmm1, %xmm0, %k1 ## encoding: [0x62,0xf3,0x7d,0x08,0x1f,0xc9,0x04]
585; CHECK-NEXT:    vmovdqa32 (%rdi), %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0x6f,0x07]
586; CHECK-NEXT:    retq ## encoding: [0xc3]
587  %mask = icmp ne <4 x i32> %mask1, zeroinitializer
588  %vaddr = bitcast i8* %addr to <4 x i32>*
589  %r = load <4 x i32>, <4 x i32>* %vaddr, align 16
590  %res = select <4 x i1> %mask, <4 x i32> %r, <4 x i32> zeroinitializer
591  ret <4 x i32>%res
592}
593
594define <4 x i32> @test_128_20(i8 * %addr, <4 x i32> %mask1) {
595; CHECK-LABEL: test_128_20:
596; CHECK:       ## BB#0:
597; CHECK-NEXT:    vpxord %xmm1, %xmm1, %xmm1 ## encoding: [0x62,0xf1,0x75,0x08,0xef,0xc9]
598; CHECK-NEXT:    vpcmpneqd %xmm1, %xmm0, %k1 ## encoding: [0x62,0xf3,0x7d,0x08,0x1f,0xc9,0x04]
599; CHECK-NEXT:    vmovdqu32 (%rdi), %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7e,0x89,0x6f,0x07]
600; CHECK-NEXT:    retq ## encoding: [0xc3]
601  %mask = icmp ne <4 x i32> %mask1, zeroinitializer
602  %vaddr = bitcast i8* %addr to <4 x i32>*
603  %r = load <4 x i32>, <4 x i32>* %vaddr, align 1
604  %res = select <4 x i1> %mask, <4 x i32> %r, <4 x i32> zeroinitializer
605  ret <4 x i32>%res
606}
607
608define <2 x i64> @test_128_21(i8 * %addr, <2 x i64> %old, <2 x i64> %mask1) {
609; CHECK-LABEL: test_128_21:
610; CHECK:       ## BB#0:
611; CHECK-NEXT:    vpxord %xmm2, %xmm2, %xmm2 ## encoding: [0x62,0xf1,0x6d,0x08,0xef,0xd2]
612; CHECK-NEXT:    vpcmpneqq %xmm2, %xmm1, %k1 ## encoding: [0x62,0xf3,0xf5,0x08,0x1f,0xca,0x04]
613; CHECK-NEXT:    vpblendmq (%rdi), %xmm0, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x64,0x07]
614; CHECK-NEXT:    retq ## encoding: [0xc3]
615  %mask = icmp ne <2 x i64> %mask1, zeroinitializer
616  %vaddr = bitcast i8* %addr to <2 x i64>*
617  %r = load <2 x i64>, <2 x i64>* %vaddr, align 16
618  %res = select <2 x i1> %mask, <2 x i64> %r, <2 x i64> %old
619  ret <2 x i64>%res
620}
621
622define <2 x i64> @test_128_22(i8 * %addr, <2 x i64> %old, <2 x i64> %mask1) {
623; CHECK-LABEL: test_128_22:
624; CHECK:       ## BB#0:
625; CHECK-NEXT:    vpxord %xmm2, %xmm2, %xmm2 ## encoding: [0x62,0xf1,0x6d,0x08,0xef,0xd2]
626; CHECK-NEXT:    vpcmpneqq %xmm2, %xmm1, %k1 ## encoding: [0x62,0xf3,0xf5,0x08,0x1f,0xca,0x04]
627; CHECK-NEXT:    vpblendmq (%rdi), %xmm0, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x64,0x07]
628; CHECK-NEXT:    retq ## encoding: [0xc3]
629  %mask = icmp ne <2 x i64> %mask1, zeroinitializer
630  %vaddr = bitcast i8* %addr to <2 x i64>*
631  %r = load <2 x i64>, <2 x i64>* %vaddr, align 1
632  %res = select <2 x i1> %mask, <2 x i64> %r, <2 x i64> %old
633  ret <2 x i64>%res
634}
635
636define <2 x i64> @test_128_23(i8 * %addr, <2 x i64> %mask1) {
637; CHECK-LABEL: test_128_23:
638; CHECK:       ## BB#0:
639; CHECK-NEXT:    vpxord %xmm1, %xmm1, %xmm1 ## encoding: [0x62,0xf1,0x75,0x08,0xef,0xc9]
640; CHECK-NEXT:    vpcmpneqq %xmm1, %xmm0, %k1 ## encoding: [0x62,0xf3,0xfd,0x08,0x1f,0xc9,0x04]
641; CHECK-NEXT:    vmovdqa64 (%rdi), %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0x89,0x6f,0x07]
642; CHECK-NEXT:    retq ## encoding: [0xc3]
643  %mask = icmp ne <2 x i64> %mask1, zeroinitializer
644  %vaddr = bitcast i8* %addr to <2 x i64>*
645  %r = load <2 x i64>, <2 x i64>* %vaddr, align 16
646  %res = select <2 x i1> %mask, <2 x i64> %r, <2 x i64> zeroinitializer
647  ret <2 x i64>%res
648}
649
650define <2 x i64> @test_128_24(i8 * %addr, <2 x i64> %mask1) {
651; CHECK-LABEL: test_128_24:
652; CHECK:       ## BB#0:
653; CHECK-NEXT:    vpxord %xmm1, %xmm1, %xmm1 ## encoding: [0x62,0xf1,0x75,0x08,0xef,0xc9]
654; CHECK-NEXT:    vpcmpneqq %xmm1, %xmm0, %k1 ## encoding: [0x62,0xf3,0xfd,0x08,0x1f,0xc9,0x04]
655; CHECK-NEXT:    vmovdqu64 (%rdi), %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfe,0x89,0x6f,0x07]
656; CHECK-NEXT:    retq ## encoding: [0xc3]
657  %mask = icmp ne <2 x i64> %mask1, zeroinitializer
658  %vaddr = bitcast i8* %addr to <2 x i64>*
659  %r = load <2 x i64>, <2 x i64>* %vaddr, align 1
660  %res = select <2 x i1> %mask, <2 x i64> %r, <2 x i64> zeroinitializer
661  ret <2 x i64>%res
662}
663
664define <4 x float> @test_128_25(i8 * %addr, <4 x float> %old, <4 x i32> %mask1) {
665; CHECK-LABEL: test_128_25:
666; CHECK:       ## BB#0:
667; CHECK-NEXT:    vpxord %xmm2, %xmm2, %xmm2 ## encoding: [0x62,0xf1,0x6d,0x08,0xef,0xd2]
668; CHECK-NEXT:    vpcmpneqd %xmm2, %xmm1, %k1 ## encoding: [0x62,0xf3,0x75,0x08,0x1f,0xca,0x04]
669; CHECK-NEXT:    vblendmps (%rdi), %xmm0, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x65,0x07]
670; CHECK-NEXT:    retq ## encoding: [0xc3]
671  %mask = icmp ne <4 x i32> %mask1, zeroinitializer
672  %vaddr = bitcast i8* %addr to <4 x float>*
673  %r = load <4 x float>, <4 x float>* %vaddr, align 16
674  %res = select <4 x i1> %mask, <4 x float> %r, <4 x float> %old
675  ret <4 x float>%res
676}
677
678define <4 x float> @test_128_26(i8 * %addr, <4 x float> %old, <4 x i32> %mask1) {
679; CHECK-LABEL: test_128_26:
680; CHECK:       ## BB#0:
681; CHECK-NEXT:    vpxord %xmm2, %xmm2, %xmm2 ## encoding: [0x62,0xf1,0x6d,0x08,0xef,0xd2]
682; CHECK-NEXT:    vpcmpneqd %xmm2, %xmm1, %k1 ## encoding: [0x62,0xf3,0x75,0x08,0x1f,0xca,0x04]
683; CHECK-NEXT:    vblendmps (%rdi), %xmm0, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x65,0x07]
684; CHECK-NEXT:    retq ## encoding: [0xc3]
685  %mask = icmp ne <4 x i32> %mask1, zeroinitializer
686  %vaddr = bitcast i8* %addr to <4 x float>*
687  %r = load <4 x float>, <4 x float>* %vaddr, align 1
688  %res = select <4 x i1> %mask, <4 x float> %r, <4 x float> %old
689  ret <4 x float>%res
690}
691
692define <4 x float> @test_128_27(i8 * %addr, <4 x i32> %mask1) {
693; CHECK-LABEL: test_128_27:
694; CHECK:       ## BB#0:
695; CHECK-NEXT:    vpxord %xmm1, %xmm1, %xmm1 ## encoding: [0x62,0xf1,0x75,0x08,0xef,0xc9]
696; CHECK-NEXT:    vpcmpneqd %xmm1, %xmm0, %k1 ## encoding: [0x62,0xf3,0x7d,0x08,0x1f,0xc9,0x04]
697; CHECK-NEXT:    vmovaps (%rdi), %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x89,0x28,0x07]
698; CHECK-NEXT:    retq ## encoding: [0xc3]
699  %mask = icmp ne <4 x i32> %mask1, zeroinitializer
700  %vaddr = bitcast i8* %addr to <4 x float>*
701  %r = load <4 x float>, <4 x float>* %vaddr, align 16
702  %res = select <4 x i1> %mask, <4 x float> %r, <4 x float> zeroinitializer
703  ret <4 x float>%res
704}
705
706define <4 x float> @test_128_28(i8 * %addr, <4 x i32> %mask1) {
707; CHECK-LABEL: test_128_28:
708; CHECK:       ## BB#0:
709; CHECK-NEXT:    vpxord %xmm1, %xmm1, %xmm1 ## encoding: [0x62,0xf1,0x75,0x08,0xef,0xc9]
710; CHECK-NEXT:    vpcmpneqd %xmm1, %xmm0, %k1 ## encoding: [0x62,0xf3,0x7d,0x08,0x1f,0xc9,0x04]
711; CHECK-NEXT:    vmovups (%rdi), %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x89,0x10,0x07]
712; CHECK-NEXT:    retq ## encoding: [0xc3]
713  %mask = icmp ne <4 x i32> %mask1, zeroinitializer
714  %vaddr = bitcast i8* %addr to <4 x float>*
715  %r = load <4 x float>, <4 x float>* %vaddr, align 1
716  %res = select <4 x i1> %mask, <4 x float> %r, <4 x float> zeroinitializer
717  ret <4 x float>%res
718}
719
720define <2 x double> @test_128_29(i8 * %addr, <2 x double> %old, <2 x i64> %mask1) {
721; CHECK-LABEL: test_128_29:
722; CHECK:       ## BB#0:
723; CHECK-NEXT:    vpxord %xmm2, %xmm2, %xmm2 ## encoding: [0x62,0xf1,0x6d,0x08,0xef,0xd2]
724; CHECK-NEXT:    vpcmpneqq %xmm2, %xmm1, %k1 ## encoding: [0x62,0xf3,0xf5,0x08,0x1f,0xca,0x04]
725; CHECK-NEXT:    vblendmpd (%rdi), %xmm0, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x65,0x07]
726; CHECK-NEXT:    retq ## encoding: [0xc3]
727  %mask = icmp ne <2 x i64> %mask1, zeroinitializer
728  %vaddr = bitcast i8* %addr to <2 x double>*
729  %r = load <2 x double>, <2 x double>* %vaddr, align 16
730  %res = select <2 x i1> %mask, <2 x double> %r, <2 x double> %old
731  ret <2 x double>%res
732}
733
734define <2 x double> @test_128_30(i8 * %addr, <2 x double> %old, <2 x i64> %mask1) {
735; CHECK-LABEL: test_128_30:
736; CHECK:       ## BB#0:
737; CHECK-NEXT:    vpxord %xmm2, %xmm2, %xmm2 ## encoding: [0x62,0xf1,0x6d,0x08,0xef,0xd2]
738; CHECK-NEXT:    vpcmpneqq %xmm2, %xmm1, %k1 ## encoding: [0x62,0xf3,0xf5,0x08,0x1f,0xca,0x04]
739; CHECK-NEXT:    vblendmpd (%rdi), %xmm0, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x65,0x07]
740; CHECK-NEXT:    retq ## encoding: [0xc3]
741  %mask = icmp ne <2 x i64> %mask1, zeroinitializer
742  %vaddr = bitcast i8* %addr to <2 x double>*
743  %r = load <2 x double>, <2 x double>* %vaddr, align 1
744  %res = select <2 x i1> %mask, <2 x double> %r, <2 x double> %old
745  ret <2 x double>%res
746}
747
748define <2 x double> @test_128_31(i8 * %addr, <2 x i64> %mask1) {
749; CHECK-LABEL: test_128_31:
750; CHECK:       ## BB#0:
751; CHECK-NEXT:    vpxord %xmm1, %xmm1, %xmm1 ## encoding: [0x62,0xf1,0x75,0x08,0xef,0xc9]
752; CHECK-NEXT:    vpcmpneqq %xmm1, %xmm0, %k1 ## encoding: [0x62,0xf3,0xfd,0x08,0x1f,0xc9,0x04]
753; CHECK-NEXT:    vmovapd (%rdi), %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0x89,0x28,0x07]
754; CHECK-NEXT:    retq ## encoding: [0xc3]
755  %mask = icmp ne <2 x i64> %mask1, zeroinitializer
756  %vaddr = bitcast i8* %addr to <2 x double>*
757  %r = load <2 x double>, <2 x double>* %vaddr, align 16
758  %res = select <2 x i1> %mask, <2 x double> %r, <2 x double> zeroinitializer
759  ret <2 x double>%res
760}
761
762define <2 x double> @test_128_32(i8 * %addr, <2 x i64> %mask1) {
763; CHECK-LABEL: test_128_32:
764; CHECK:       ## BB#0:
765; CHECK-NEXT:    vpxord %xmm1, %xmm1, %xmm1 ## encoding: [0x62,0xf1,0x75,0x08,0xef,0xc9]
766; CHECK-NEXT:    vpcmpneqq %xmm1, %xmm0, %k1 ## encoding: [0x62,0xf3,0xfd,0x08,0x1f,0xc9,0x04]
767; CHECK-NEXT:    vmovupd (%rdi), %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0x89,0x10,0x07]
768; CHECK-NEXT:    retq ## encoding: [0xc3]
769  %mask = icmp ne <2 x i64> %mask1, zeroinitializer
770  %vaddr = bitcast i8* %addr to <2 x double>*
771  %r = load <2 x double>, <2 x double>* %vaddr, align 1
772  %res = select <2 x i1> %mask, <2 x double> %r, <2 x double> zeroinitializer
773  ret <2 x double>%res
774}
775
776