1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512vl --show-mc-encoding| FileCheck %s
3
4define <8 x i32> @test_256_1(i8 * %addr) {
5; CHECK-LABEL: test_256_1:
6; CHECK:       ## %bb.0:
7; CHECK-NEXT:    vmovups (%rdi), %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x10,0x07]
8; CHECK-NEXT:    retq ## encoding: [0xc3]
9  %vaddr = bitcast i8* %addr to <8 x i32>*
10  %res = load <8 x i32>, <8 x i32>* %vaddr, align 1
11  ret <8 x i32>%res
12}
13
14define <8 x i32> @test_256_2(i8 * %addr) {
15; CHECK-LABEL: test_256_2:
16; CHECK:       ## %bb.0:
17; CHECK-NEXT:    vmovaps (%rdi), %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0x07]
18; CHECK-NEXT:    retq ## encoding: [0xc3]
19  %vaddr = bitcast i8* %addr to <8 x i32>*
20  %res = load <8 x i32>, <8 x i32>* %vaddr, align 32
21  ret <8 x i32>%res
22}
23
24define void @test_256_3(i8 * %addr, <4 x i64> %data) {
25; CHECK-LABEL: test_256_3:
26; CHECK:       ## %bb.0:
27; CHECK-NEXT:    vmovaps %ymm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x29,0x07]
28; CHECK-NEXT:    retq ## encoding: [0xc3]
29  %vaddr = bitcast i8* %addr to <4 x i64>*
30  store <4 x i64>%data, <4 x i64>* %vaddr, align 32
31  ret void
32}
33
34define void @test_256_4(i8 * %addr, <8 x i32> %data) {
35; CHECK-LABEL: test_256_4:
36; CHECK:       ## %bb.0:
37; CHECK-NEXT:    vmovups %ymm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x11,0x07]
38; CHECK-NEXT:    retq ## encoding: [0xc3]
39  %vaddr = bitcast i8* %addr to <8 x i32>*
40  store <8 x i32>%data, <8 x i32>* %vaddr, align 1
41  ret void
42}
43
44define void @test_256_5(i8 * %addr, <8 x i32> %data) {
45; CHECK-LABEL: test_256_5:
46; CHECK:       ## %bb.0:
47; CHECK-NEXT:    vmovaps %ymm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x29,0x07]
48; CHECK-NEXT:    retq ## encoding: [0xc3]
49  %vaddr = bitcast i8* %addr to <8 x i32>*
50  store <8 x i32>%data, <8 x i32>* %vaddr, align 32
51  ret void
52}
53
54define  <4 x i64> @test_256_6(i8 * %addr) {
55; CHECK-LABEL: test_256_6:
56; CHECK:       ## %bb.0:
57; CHECK-NEXT:    vmovaps (%rdi), %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0x07]
58; CHECK-NEXT:    retq ## encoding: [0xc3]
59  %vaddr = bitcast i8* %addr to <4 x i64>*
60  %res = load <4 x i64>, <4 x i64>* %vaddr, align 32
61  ret <4 x i64>%res
62}
63
64define void @test_256_7(i8 * %addr, <4 x i64> %data) {
65; CHECK-LABEL: test_256_7:
66; CHECK:       ## %bb.0:
67; CHECK-NEXT:    vmovups %ymm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x11,0x07]
68; CHECK-NEXT:    retq ## encoding: [0xc3]
69  %vaddr = bitcast i8* %addr to <4 x i64>*
70  store <4 x i64>%data, <4 x i64>* %vaddr, align 1
71  ret void
72}
73
74define <4 x i64> @test_256_8(i8 * %addr) {
75; CHECK-LABEL: test_256_8:
76; CHECK:       ## %bb.0:
77; CHECK-NEXT:    vmovups (%rdi), %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x10,0x07]
78; CHECK-NEXT:    retq ## encoding: [0xc3]
79  %vaddr = bitcast i8* %addr to <4 x i64>*
80  %res = load <4 x i64>, <4 x i64>* %vaddr, align 1
81  ret <4 x i64>%res
82}
83
84define void @test_256_9(i8 * %addr, <4 x double> %data) {
85; CHECK-LABEL: test_256_9:
86; CHECK:       ## %bb.0:
87; CHECK-NEXT:    vmovaps %ymm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x29,0x07]
88; CHECK-NEXT:    retq ## encoding: [0xc3]
89  %vaddr = bitcast i8* %addr to <4 x double>*
90  store <4 x double>%data, <4 x double>* %vaddr, align 32
91  ret void
92}
93
94define <4 x double> @test_256_10(i8 * %addr) {
95; CHECK-LABEL: test_256_10:
96; CHECK:       ## %bb.0:
97; CHECK-NEXT:    vmovaps (%rdi), %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0x07]
98; CHECK-NEXT:    retq ## encoding: [0xc3]
99  %vaddr = bitcast i8* %addr to <4 x double>*
100  %res = load <4 x double>, <4 x double>* %vaddr, align 32
101  ret <4 x double>%res
102}
103
104define void @test_256_11(i8 * %addr, <8 x float> %data) {
105; CHECK-LABEL: test_256_11:
106; CHECK:       ## %bb.0:
107; CHECK-NEXT:    vmovaps %ymm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x29,0x07]
108; CHECK-NEXT:    retq ## encoding: [0xc3]
109  %vaddr = bitcast i8* %addr to <8 x float>*
110  store <8 x float>%data, <8 x float>* %vaddr, align 32
111  ret void
112}
113
114define <8 x float> @test_256_12(i8 * %addr) {
115; CHECK-LABEL: test_256_12:
116; CHECK:       ## %bb.0:
117; CHECK-NEXT:    vmovaps (%rdi), %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0x07]
118; CHECK-NEXT:    retq ## encoding: [0xc3]
119  %vaddr = bitcast i8* %addr to <8 x float>*
120  %res = load <8 x float>, <8 x float>* %vaddr, align 32
121  ret <8 x float>%res
122}
123
124define void @test_256_13(i8 * %addr, <4 x double> %data) {
125; CHECK-LABEL: test_256_13:
126; CHECK:       ## %bb.0:
127; CHECK-NEXT:    vmovups %ymm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x11,0x07]
128; CHECK-NEXT:    retq ## encoding: [0xc3]
129  %vaddr = bitcast i8* %addr to <4 x double>*
130  store <4 x double>%data, <4 x double>* %vaddr, align 1
131  ret void
132}
133
134define <4 x double> @test_256_14(i8 * %addr) {
135; CHECK-LABEL: test_256_14:
136; CHECK:       ## %bb.0:
137; CHECK-NEXT:    vmovups (%rdi), %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x10,0x07]
138; CHECK-NEXT:    retq ## encoding: [0xc3]
139  %vaddr = bitcast i8* %addr to <4 x double>*
140  %res = load <4 x double>, <4 x double>* %vaddr, align 1
141  ret <4 x double>%res
142}
143
144define void @test_256_15(i8 * %addr, <8 x float> %data) {
145; CHECK-LABEL: test_256_15:
146; CHECK:       ## %bb.0:
147; CHECK-NEXT:    vmovups %ymm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x11,0x07]
148; CHECK-NEXT:    retq ## encoding: [0xc3]
149  %vaddr = bitcast i8* %addr to <8 x float>*
150  store <8 x float>%data, <8 x float>* %vaddr, align 1
151  ret void
152}
153
154define <8 x float> @test_256_16(i8 * %addr) {
155; CHECK-LABEL: test_256_16:
156; CHECK:       ## %bb.0:
157; CHECK-NEXT:    vmovups (%rdi), %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x10,0x07]
158; CHECK-NEXT:    retq ## encoding: [0xc3]
159  %vaddr = bitcast i8* %addr to <8 x float>*
160  %res = load <8 x float>, <8 x float>* %vaddr, align 1
161  ret <8 x float>%res
162}
163
164define <8 x i32> @test_256_17(i8 * %addr, <8 x i32> %old, <8 x i32> %mask1) {
165; CHECK-LABEL: test_256_17:
166; CHECK:       ## %bb.0:
167; CHECK-NEXT:    vptestmd %ymm1, %ymm1, %k1 ## encoding: [0x62,0xf2,0x75,0x28,0x27,0xc9]
168; CHECK-NEXT:    vmovdqa32 (%rdi), %ymm0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x6f,0x07]
169; CHECK-NEXT:    retq ## encoding: [0xc3]
170  %mask = icmp ne <8 x i32> %mask1, zeroinitializer
171  %vaddr = bitcast i8* %addr to <8 x i32>*
172  %r = load <8 x i32>, <8 x i32>* %vaddr, align 32
173  %res = select <8 x i1> %mask, <8 x i32> %r, <8 x i32> %old
174  ret <8 x i32>%res
175}
176
177define <8 x i32> @test_256_18(i8 * %addr, <8 x i32> %old, <8 x i32> %mask1) {
178; CHECK-LABEL: test_256_18:
179; CHECK:       ## %bb.0:
180; CHECK-NEXT:    vptestmd %ymm1, %ymm1, %k1 ## encoding: [0x62,0xf2,0x75,0x28,0x27,0xc9]
181; CHECK-NEXT:    vmovdqu32 (%rdi), %ymm0 {%k1} ## encoding: [0x62,0xf1,0x7e,0x29,0x6f,0x07]
182; CHECK-NEXT:    retq ## encoding: [0xc3]
183  %mask = icmp ne <8 x i32> %mask1, zeroinitializer
184  %vaddr = bitcast i8* %addr to <8 x i32>*
185  %r = load <8 x i32>, <8 x i32>* %vaddr, align 1
186  %res = select <8 x i1> %mask, <8 x i32> %r, <8 x i32> %old
187  ret <8 x i32>%res
188}
189
190define <8 x i32> @test_256_19(i8 * %addr, <8 x i32> %mask1) {
191; CHECK-LABEL: test_256_19:
192; CHECK:       ## %bb.0:
193; CHECK-NEXT:    vptestmd %ymm0, %ymm0, %k1 ## encoding: [0x62,0xf2,0x7d,0x28,0x27,0xc8]
194; CHECK-NEXT:    vmovdqa32 (%rdi), %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0x6f,0x07]
195; CHECK-NEXT:    retq ## encoding: [0xc3]
196  %mask = icmp ne <8 x i32> %mask1, zeroinitializer
197  %vaddr = bitcast i8* %addr to <8 x i32>*
198  %r = load <8 x i32>, <8 x i32>* %vaddr, align 32
199  %res = select <8 x i1> %mask, <8 x i32> %r, <8 x i32> zeroinitializer
200  ret <8 x i32>%res
201}
202
203define <8 x i32> @test_256_20(i8 * %addr, <8 x i32> %mask1) {
204; CHECK-LABEL: test_256_20:
205; CHECK:       ## %bb.0:
206; CHECK-NEXT:    vptestmd %ymm0, %ymm0, %k1 ## encoding: [0x62,0xf2,0x7d,0x28,0x27,0xc8]
207; CHECK-NEXT:    vmovdqu32 (%rdi), %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7e,0xa9,0x6f,0x07]
208; CHECK-NEXT:    retq ## encoding: [0xc3]
209  %mask = icmp ne <8 x i32> %mask1, zeroinitializer
210  %vaddr = bitcast i8* %addr to <8 x i32>*
211  %r = load <8 x i32>, <8 x i32>* %vaddr, align 1
212  %res = select <8 x i1> %mask, <8 x i32> %r, <8 x i32> zeroinitializer
213  ret <8 x i32>%res
214}
215
216define <4 x i64> @test_256_21(i8 * %addr, <4 x i64> %old, <4 x i64> %mask1) {
217; CHECK-LABEL: test_256_21:
218; CHECK:       ## %bb.0:
219; CHECK-NEXT:    vptestmq %ymm1, %ymm1, %k1 ## encoding: [0x62,0xf2,0xf5,0x28,0x27,0xc9]
220; CHECK-NEXT:    vmovdqa64 (%rdi), %ymm0 {%k1} ## encoding: [0x62,0xf1,0xfd,0x29,0x6f,0x07]
221; CHECK-NEXT:    retq ## encoding: [0xc3]
222  %mask = icmp ne <4 x i64> %mask1, zeroinitializer
223  %vaddr = bitcast i8* %addr to <4 x i64>*
224  %r = load <4 x i64>, <4 x i64>* %vaddr, align 32
225  %res = select <4 x i1> %mask, <4 x i64> %r, <4 x i64> %old
226  ret <4 x i64>%res
227}
228
229define <4 x i64> @test_256_22(i8 * %addr, <4 x i64> %old, <4 x i64> %mask1) {
230; CHECK-LABEL: test_256_22:
231; CHECK:       ## %bb.0:
232; CHECK-NEXT:    vptestmq %ymm1, %ymm1, %k1 ## encoding: [0x62,0xf2,0xf5,0x28,0x27,0xc9]
233; CHECK-NEXT:    vmovdqu64 (%rdi), %ymm0 {%k1} ## encoding: [0x62,0xf1,0xfe,0x29,0x6f,0x07]
234; CHECK-NEXT:    retq ## encoding: [0xc3]
235  %mask = icmp ne <4 x i64> %mask1, zeroinitializer
236  %vaddr = bitcast i8* %addr to <4 x i64>*
237  %r = load <4 x i64>, <4 x i64>* %vaddr, align 1
238  %res = select <4 x i1> %mask, <4 x i64> %r, <4 x i64> %old
239  ret <4 x i64>%res
240}
241
242define <4 x i64> @test_256_23(i8 * %addr, <4 x i64> %mask1) {
243; CHECK-LABEL: test_256_23:
244; CHECK:       ## %bb.0:
245; CHECK-NEXT:    vptestmq %ymm0, %ymm0, %k1 ## encoding: [0x62,0xf2,0xfd,0x28,0x27,0xc8]
246; CHECK-NEXT:    vmovdqa64 (%rdi), %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xa9,0x6f,0x07]
247; CHECK-NEXT:    retq ## encoding: [0xc3]
248  %mask = icmp ne <4 x i64> %mask1, zeroinitializer
249  %vaddr = bitcast i8* %addr to <4 x i64>*
250  %r = load <4 x i64>, <4 x i64>* %vaddr, align 32
251  %res = select <4 x i1> %mask, <4 x i64> %r, <4 x i64> zeroinitializer
252  ret <4 x i64>%res
253}
254
255define <4 x i64> @test_256_24(i8 * %addr, <4 x i64> %mask1) {
256; CHECK-LABEL: test_256_24:
257; CHECK:       ## %bb.0:
258; CHECK-NEXT:    vptestmq %ymm0, %ymm0, %k1 ## encoding: [0x62,0xf2,0xfd,0x28,0x27,0xc8]
259; CHECK-NEXT:    vmovdqu64 (%rdi), %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfe,0xa9,0x6f,0x07]
260; CHECK-NEXT:    retq ## encoding: [0xc3]
261  %mask = icmp ne <4 x i64> %mask1, zeroinitializer
262  %vaddr = bitcast i8* %addr to <4 x i64>*
263  %r = load <4 x i64>, <4 x i64>* %vaddr, align 1
264  %res = select <4 x i1> %mask, <4 x i64> %r, <4 x i64> zeroinitializer
265  ret <4 x i64>%res
266}
267
268define <8 x float> @test_256_25(i8 * %addr, <8 x float> %old, <8 x float> %mask1) {
269; CHECK-LABEL: test_256_25:
270; CHECK:       ## %bb.0:
271; CHECK-NEXT:    vxorps %xmm2, %xmm2, %xmm2 ## encoding: [0xc5,0xe8,0x57,0xd2]
272; CHECK-NEXT:    vcmpneq_oqps %ymm2, %ymm1, %k1 ## encoding: [0x62,0xf1,0x74,0x28,0xc2,0xca,0x0c]
273; CHECK-NEXT:    vmovaps (%rdi), %ymm0 {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0x28,0x07]
274; CHECK-NEXT:    retq ## encoding: [0xc3]
275  %mask = fcmp one <8 x float> %mask1, zeroinitializer
276  %vaddr = bitcast i8* %addr to <8 x float>*
277  %r = load <8 x float>, <8 x float>* %vaddr, align 32
278  %res = select <8 x i1> %mask, <8 x float> %r, <8 x float> %old
279  ret <8 x float>%res
280}
281
282define <8 x float> @test_256_26(i8 * %addr, <8 x float> %old, <8 x float> %mask1) {
283; CHECK-LABEL: test_256_26:
284; CHECK:       ## %bb.0:
285; CHECK-NEXT:    vxorps %xmm2, %xmm2, %xmm2 ## encoding: [0xc5,0xe8,0x57,0xd2]
286; CHECK-NEXT:    vcmpneq_oqps %ymm2, %ymm1, %k1 ## encoding: [0x62,0xf1,0x74,0x28,0xc2,0xca,0x0c]
287; CHECK-NEXT:    vmovups (%rdi), %ymm0 {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0x10,0x07]
288; CHECK-NEXT:    retq ## encoding: [0xc3]
289  %mask = fcmp one <8 x float> %mask1, zeroinitializer
290  %vaddr = bitcast i8* %addr to <8 x float>*
291  %r = load <8 x float>, <8 x float>* %vaddr, align 1
292  %res = select <8 x i1> %mask, <8 x float> %r, <8 x float> %old
293  ret <8 x float>%res
294}
295
296define <8 x float> @test_256_27(i8 * %addr, <8 x float> %mask1) {
297; CHECK-LABEL: test_256_27:
298; CHECK:       ## %bb.0:
299; CHECK-NEXT:    vxorps %xmm1, %xmm1, %xmm1 ## encoding: [0xc5,0xf0,0x57,0xc9]
300; CHECK-NEXT:    vcmpneq_oqps %ymm1, %ymm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x28,0xc2,0xc9,0x0c]
301; CHECK-NEXT:    vmovaps (%rdi), %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xa9,0x28,0x07]
302; CHECK-NEXT:    retq ## encoding: [0xc3]
303  %mask = fcmp one <8 x float> %mask1, zeroinitializer
304  %vaddr = bitcast i8* %addr to <8 x float>*
305  %r = load <8 x float>, <8 x float>* %vaddr, align 32
306  %res = select <8 x i1> %mask, <8 x float> %r, <8 x float> zeroinitializer
307  ret <8 x float>%res
308}
309
310define <8 x float> @test_256_28(i8 * %addr, <8 x float> %mask1) {
311; CHECK-LABEL: test_256_28:
312; CHECK:       ## %bb.0:
313; CHECK-NEXT:    vxorps %xmm1, %xmm1, %xmm1 ## encoding: [0xc5,0xf0,0x57,0xc9]
314; CHECK-NEXT:    vcmpneq_oqps %ymm1, %ymm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x28,0xc2,0xc9,0x0c]
315; CHECK-NEXT:    vmovups (%rdi), %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xa9,0x10,0x07]
316; CHECK-NEXT:    retq ## encoding: [0xc3]
317  %mask = fcmp one <8 x float> %mask1, zeroinitializer
318  %vaddr = bitcast i8* %addr to <8 x float>*
319  %r = load <8 x float>, <8 x float>* %vaddr, align 1
320  %res = select <8 x i1> %mask, <8 x float> %r, <8 x float> zeroinitializer
321  ret <8 x float>%res
322}
323
324define <4 x double> @test_256_29(i8 * %addr, <4 x double> %old, <4 x i64> %mask1) {
325; CHECK-LABEL: test_256_29:
326; CHECK:       ## %bb.0:
327; CHECK-NEXT:    vptestmq %ymm1, %ymm1, %k1 ## encoding: [0x62,0xf2,0xf5,0x28,0x27,0xc9]
328; CHECK-NEXT:    vmovapd (%rdi), %ymm0 {%k1} ## encoding: [0x62,0xf1,0xfd,0x29,0x28,0x07]
329; CHECK-NEXT:    retq ## encoding: [0xc3]
330  %mask = icmp ne <4 x i64> %mask1, zeroinitializer
331  %vaddr = bitcast i8* %addr to <4 x double>*
332  %r = load <4 x double>, <4 x double>* %vaddr, align 32
333  %res = select <4 x i1> %mask, <4 x double> %r, <4 x double> %old
334  ret <4 x double>%res
335}
336
337define <4 x double> @test_256_30(i8 * %addr, <4 x double> %old, <4 x i64> %mask1) {
338; CHECK-LABEL: test_256_30:
339; CHECK:       ## %bb.0:
340; CHECK-NEXT:    vptestmq %ymm1, %ymm1, %k1 ## encoding: [0x62,0xf2,0xf5,0x28,0x27,0xc9]
341; CHECK-NEXT:    vmovupd (%rdi), %ymm0 {%k1} ## encoding: [0x62,0xf1,0xfd,0x29,0x10,0x07]
342; CHECK-NEXT:    retq ## encoding: [0xc3]
343  %mask = icmp ne <4 x i64> %mask1, zeroinitializer
344  %vaddr = bitcast i8* %addr to <4 x double>*
345  %r = load <4 x double>, <4 x double>* %vaddr, align 1
346  %res = select <4 x i1> %mask, <4 x double> %r, <4 x double> %old
347  ret <4 x double>%res
348}
349
350define <4 x double> @test_256_31(i8 * %addr, <4 x i64> %mask1) {
351; CHECK-LABEL: test_256_31:
352; CHECK:       ## %bb.0:
353; CHECK-NEXT:    vptestmq %ymm0, %ymm0, %k1 ## encoding: [0x62,0xf2,0xfd,0x28,0x27,0xc8]
354; CHECK-NEXT:    vmovapd (%rdi), %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xa9,0x28,0x07]
355; CHECK-NEXT:    retq ## encoding: [0xc3]
356  %mask = icmp ne <4 x i64> %mask1, zeroinitializer
357  %vaddr = bitcast i8* %addr to <4 x double>*
358  %r = load <4 x double>, <4 x double>* %vaddr, align 32
359  %res = select <4 x i1> %mask, <4 x double> %r, <4 x double> zeroinitializer
360  ret <4 x double>%res
361}
362
363define <4 x double> @test_256_32(i8 * %addr, <4 x i64> %mask1) {
364; CHECK-LABEL: test_256_32:
365; CHECK:       ## %bb.0:
366; CHECK-NEXT:    vptestmq %ymm0, %ymm0, %k1 ## encoding: [0x62,0xf2,0xfd,0x28,0x27,0xc8]
367; CHECK-NEXT:    vmovupd (%rdi), %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xa9,0x10,0x07]
368; CHECK-NEXT:    retq ## encoding: [0xc3]
369  %mask = icmp ne <4 x i64> %mask1, zeroinitializer
370  %vaddr = bitcast i8* %addr to <4 x double>*
371  %r = load <4 x double>, <4 x double>* %vaddr, align 1
372  %res = select <4 x i1> %mask, <4 x double> %r, <4 x double> zeroinitializer
373  ret <4 x double>%res
374}
375
376define <4 x i32> @test_128_1(i8 * %addr) {
377; CHECK-LABEL: test_128_1:
378; CHECK:       ## %bb.0:
379; CHECK-NEXT:    vmovups (%rdi), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x10,0x07]
380; CHECK-NEXT:    retq ## encoding: [0xc3]
381  %vaddr = bitcast i8* %addr to <4 x i32>*
382  %res = load <4 x i32>, <4 x i32>* %vaddr, align 1
383  ret <4 x i32>%res
384}
385
386define <4 x i32> @test_128_2(i8 * %addr) {
387; CHECK-LABEL: test_128_2:
388; CHECK:       ## %bb.0:
389; CHECK-NEXT:    vmovaps (%rdi), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x07]
390; CHECK-NEXT:    retq ## encoding: [0xc3]
391  %vaddr = bitcast i8* %addr to <4 x i32>*
392  %res = load <4 x i32>, <4 x i32>* %vaddr, align 16
393  ret <4 x i32>%res
394}
395
396define void @test_128_3(i8 * %addr, <2 x i64> %data) {
397; CHECK-LABEL: test_128_3:
398; CHECK:       ## %bb.0:
399; CHECK-NEXT:    vmovaps %xmm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x07]
400; CHECK-NEXT:    retq ## encoding: [0xc3]
401  %vaddr = bitcast i8* %addr to <2 x i64>*
402  store <2 x i64>%data, <2 x i64>* %vaddr, align 16
403  ret void
404}
405
406define void @test_128_4(i8 * %addr, <4 x i32> %data) {
407; CHECK-LABEL: test_128_4:
408; CHECK:       ## %bb.0:
409; CHECK-NEXT:    vmovups %xmm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x11,0x07]
410; CHECK-NEXT:    retq ## encoding: [0xc3]
411  %vaddr = bitcast i8* %addr to <4 x i32>*
412  store <4 x i32>%data, <4 x i32>* %vaddr, align 1
413  ret void
414}
415
416define void @test_128_5(i8 * %addr, <4 x i32> %data) {
417; CHECK-LABEL: test_128_5:
418; CHECK:       ## %bb.0:
419; CHECK-NEXT:    vmovaps %xmm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x07]
420; CHECK-NEXT:    retq ## encoding: [0xc3]
421  %vaddr = bitcast i8* %addr to <4 x i32>*
422  store <4 x i32>%data, <4 x i32>* %vaddr, align 16
423  ret void
424}
425
426define  <2 x i64> @test_128_6(i8 * %addr) {
427; CHECK-LABEL: test_128_6:
428; CHECK:       ## %bb.0:
429; CHECK-NEXT:    vmovaps (%rdi), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x07]
430; CHECK-NEXT:    retq ## encoding: [0xc3]
431  %vaddr = bitcast i8* %addr to <2 x i64>*
432  %res = load <2 x i64>, <2 x i64>* %vaddr, align 16
433  ret <2 x i64>%res
434}
435
436define void @test_128_7(i8 * %addr, <2 x i64> %data) {
437; CHECK-LABEL: test_128_7:
438; CHECK:       ## %bb.0:
439; CHECK-NEXT:    vmovups %xmm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x11,0x07]
440; CHECK-NEXT:    retq ## encoding: [0xc3]
441  %vaddr = bitcast i8* %addr to <2 x i64>*
442  store <2 x i64>%data, <2 x i64>* %vaddr, align 1
443  ret void
444}
445
446define <2 x i64> @test_128_8(i8 * %addr) {
447; CHECK-LABEL: test_128_8:
448; CHECK:       ## %bb.0:
449; CHECK-NEXT:    vmovups (%rdi), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x10,0x07]
450; CHECK-NEXT:    retq ## encoding: [0xc3]
451  %vaddr = bitcast i8* %addr to <2 x i64>*
452  %res = load <2 x i64>, <2 x i64>* %vaddr, align 1
453  ret <2 x i64>%res
454}
455
456define void @test_128_9(i8 * %addr, <2 x double> %data) {
457; CHECK-LABEL: test_128_9:
458; CHECK:       ## %bb.0:
459; CHECK-NEXT:    vmovaps %xmm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x07]
460; CHECK-NEXT:    retq ## encoding: [0xc3]
461  %vaddr = bitcast i8* %addr to <2 x double>*
462  store <2 x double>%data, <2 x double>* %vaddr, align 16
463  ret void
464}
465
466define <2 x double> @test_128_10(i8 * %addr) {
467; CHECK-LABEL: test_128_10:
468; CHECK:       ## %bb.0:
469; CHECK-NEXT:    vmovaps (%rdi), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x07]
470; CHECK-NEXT:    retq ## encoding: [0xc3]
471  %vaddr = bitcast i8* %addr to <2 x double>*
472  %res = load <2 x double>, <2 x double>* %vaddr, align 16
473  ret <2 x double>%res
474}
475
476define void @test_128_11(i8 * %addr, <4 x float> %data) {
477; CHECK-LABEL: test_128_11:
478; CHECK:       ## %bb.0:
479; CHECK-NEXT:    vmovaps %xmm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x07]
480; CHECK-NEXT:    retq ## encoding: [0xc3]
481  %vaddr = bitcast i8* %addr to <4 x float>*
482  store <4 x float>%data, <4 x float>* %vaddr, align 16
483  ret void
484}
485
486define <4 x float> @test_128_12(i8 * %addr) {
487; CHECK-LABEL: test_128_12:
488; CHECK:       ## %bb.0:
489; CHECK-NEXT:    vmovaps (%rdi), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x07]
490; CHECK-NEXT:    retq ## encoding: [0xc3]
491  %vaddr = bitcast i8* %addr to <4 x float>*
492  %res = load <4 x float>, <4 x float>* %vaddr, align 16
493  ret <4 x float>%res
494}
495
496define void @test_128_13(i8 * %addr, <2 x double> %data) {
497; CHECK-LABEL: test_128_13:
498; CHECK:       ## %bb.0:
499; CHECK-NEXT:    vmovups %xmm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x11,0x07]
500; CHECK-NEXT:    retq ## encoding: [0xc3]
501  %vaddr = bitcast i8* %addr to <2 x double>*
502  store <2 x double>%data, <2 x double>* %vaddr, align 1
503  ret void
504}
505
506define <2 x double> @test_128_14(i8 * %addr) {
507; CHECK-LABEL: test_128_14:
508; CHECK:       ## %bb.0:
509; CHECK-NEXT:    vmovups (%rdi), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x10,0x07]
510; CHECK-NEXT:    retq ## encoding: [0xc3]
511  %vaddr = bitcast i8* %addr to <2 x double>*
512  %res = load <2 x double>, <2 x double>* %vaddr, align 1
513  ret <2 x double>%res
514}
515
516define void @test_128_15(i8 * %addr, <4 x float> %data) {
517; CHECK-LABEL: test_128_15:
518; CHECK:       ## %bb.0:
519; CHECK-NEXT:    vmovups %xmm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x11,0x07]
520; CHECK-NEXT:    retq ## encoding: [0xc3]
521  %vaddr = bitcast i8* %addr to <4 x float>*
522  store <4 x float>%data, <4 x float>* %vaddr, align 1
523  ret void
524}
525
526define <4 x float> @test_128_16(i8 * %addr) {
527; CHECK-LABEL: test_128_16:
528; CHECK:       ## %bb.0:
529; CHECK-NEXT:    vmovups (%rdi), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x10,0x07]
530; CHECK-NEXT:    retq ## encoding: [0xc3]
531  %vaddr = bitcast i8* %addr to <4 x float>*
532  %res = load <4 x float>, <4 x float>* %vaddr, align 1
533  ret <4 x float>%res
534}
535
536define <4 x i32> @test_128_17(i8 * %addr, <4 x i32> %old, <4 x i32> %mask1) {
537; CHECK-LABEL: test_128_17:
538; CHECK:       ## %bb.0:
539; CHECK-NEXT:    vptestmd %xmm1, %xmm1, %k1 ## encoding: [0x62,0xf2,0x75,0x08,0x27,0xc9]
540; CHECK-NEXT:    vmovdqa32 (%rdi), %xmm0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0x6f,0x07]
541; CHECK-NEXT:    retq ## encoding: [0xc3]
542  %mask = icmp ne <4 x i32> %mask1, zeroinitializer
543  %vaddr = bitcast i8* %addr to <4 x i32>*
544  %r = load <4 x i32>, <4 x i32>* %vaddr, align 16
545  %res = select <4 x i1> %mask, <4 x i32> %r, <4 x i32> %old
546  ret <4 x i32>%res
547}
548
549define <4 x i32> @test_128_18(i8 * %addr, <4 x i32> %old, <4 x i32> %mask1) {
550; CHECK-LABEL: test_128_18:
551; CHECK:       ## %bb.0:
552; CHECK-NEXT:    vptestmd %xmm1, %xmm1, %k1 ## encoding: [0x62,0xf2,0x75,0x08,0x27,0xc9]
553; CHECK-NEXT:    vmovdqu32 (%rdi), %xmm0 {%k1} ## encoding: [0x62,0xf1,0x7e,0x09,0x6f,0x07]
554; CHECK-NEXT:    retq ## encoding: [0xc3]
555  %mask = icmp ne <4 x i32> %mask1, zeroinitializer
556  %vaddr = bitcast i8* %addr to <4 x i32>*
557  %r = load <4 x i32>, <4 x i32>* %vaddr, align 1
558  %res = select <4 x i1> %mask, <4 x i32> %r, <4 x i32> %old
559  ret <4 x i32>%res
560}
561
562define <4 x i32> @test_128_19(i8 * %addr, <4 x i32> %mask1) {
563; CHECK-LABEL: test_128_19:
564; CHECK:       ## %bb.0:
565; CHECK-NEXT:    vptestmd %xmm0, %xmm0, %k1 ## encoding: [0x62,0xf2,0x7d,0x08,0x27,0xc8]
566; CHECK-NEXT:    vmovdqa32 (%rdi), %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0x6f,0x07]
567; CHECK-NEXT:    retq ## encoding: [0xc3]
568  %mask = icmp ne <4 x i32> %mask1, zeroinitializer
569  %vaddr = bitcast i8* %addr to <4 x i32>*
570  %r = load <4 x i32>, <4 x i32>* %vaddr, align 16
571  %res = select <4 x i1> %mask, <4 x i32> %r, <4 x i32> zeroinitializer
572  ret <4 x i32>%res
573}
574
575define <4 x i32> @test_128_20(i8 * %addr, <4 x i32> %mask1) {
576; CHECK-LABEL: test_128_20:
577; CHECK:       ## %bb.0:
578; CHECK-NEXT:    vptestmd %xmm0, %xmm0, %k1 ## encoding: [0x62,0xf2,0x7d,0x08,0x27,0xc8]
579; CHECK-NEXT:    vmovdqu32 (%rdi), %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7e,0x89,0x6f,0x07]
580; CHECK-NEXT:    retq ## encoding: [0xc3]
581  %mask = icmp ne <4 x i32> %mask1, zeroinitializer
582  %vaddr = bitcast i8* %addr to <4 x i32>*
583  %r = load <4 x i32>, <4 x i32>* %vaddr, align 1
584  %res = select <4 x i1> %mask, <4 x i32> %r, <4 x i32> zeroinitializer
585  ret <4 x i32>%res
586}
587
588define <2 x i64> @test_128_21(i8 * %addr, <2 x i64> %old, <2 x i64> %mask1) {
589; CHECK-LABEL: test_128_21:
590; CHECK:       ## %bb.0:
591; CHECK-NEXT:    vptestmq %xmm1, %xmm1, %k1 ## encoding: [0x62,0xf2,0xf5,0x08,0x27,0xc9]
592; CHECK-NEXT:    vmovdqa64 (%rdi), %xmm0 {%k1} ## encoding: [0x62,0xf1,0xfd,0x09,0x6f,0x07]
593; CHECK-NEXT:    retq ## encoding: [0xc3]
594  %mask = icmp ne <2 x i64> %mask1, zeroinitializer
595  %vaddr = bitcast i8* %addr to <2 x i64>*
596  %r = load <2 x i64>, <2 x i64>* %vaddr, align 16
597  %res = select <2 x i1> %mask, <2 x i64> %r, <2 x i64> %old
598  ret <2 x i64>%res
599}
600
601define <2 x i64> @test_128_22(i8 * %addr, <2 x i64> %old, <2 x i64> %mask1) {
602; CHECK-LABEL: test_128_22:
603; CHECK:       ## %bb.0:
604; CHECK-NEXT:    vptestmq %xmm1, %xmm1, %k1 ## encoding: [0x62,0xf2,0xf5,0x08,0x27,0xc9]
605; CHECK-NEXT:    vmovdqu64 (%rdi), %xmm0 {%k1} ## encoding: [0x62,0xf1,0xfe,0x09,0x6f,0x07]
606; CHECK-NEXT:    retq ## encoding: [0xc3]
607  %mask = icmp ne <2 x i64> %mask1, zeroinitializer
608  %vaddr = bitcast i8* %addr to <2 x i64>*
609  %r = load <2 x i64>, <2 x i64>* %vaddr, align 1
610  %res = select <2 x i1> %mask, <2 x i64> %r, <2 x i64> %old
611  ret <2 x i64>%res
612}
613
614define <2 x i64> @test_128_23(i8 * %addr, <2 x i64> %mask1) {
615; CHECK-LABEL: test_128_23:
616; CHECK:       ## %bb.0:
617; CHECK-NEXT:    vptestmq %xmm0, %xmm0, %k1 ## encoding: [0x62,0xf2,0xfd,0x08,0x27,0xc8]
618; CHECK-NEXT:    vmovdqa64 (%rdi), %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0x89,0x6f,0x07]
619; CHECK-NEXT:    retq ## encoding: [0xc3]
620  %mask = icmp ne <2 x i64> %mask1, zeroinitializer
621  %vaddr = bitcast i8* %addr to <2 x i64>*
622  %r = load <2 x i64>, <2 x i64>* %vaddr, align 16
623  %res = select <2 x i1> %mask, <2 x i64> %r, <2 x i64> zeroinitializer
624  ret <2 x i64>%res
625}
626
627define <2 x i64> @test_128_24(i8 * %addr, <2 x i64> %mask1) {
628; CHECK-LABEL: test_128_24:
629; CHECK:       ## %bb.0:
630; CHECK-NEXT:    vptestmq %xmm0, %xmm0, %k1 ## encoding: [0x62,0xf2,0xfd,0x08,0x27,0xc8]
631; CHECK-NEXT:    vmovdqu64 (%rdi), %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfe,0x89,0x6f,0x07]
632; CHECK-NEXT:    retq ## encoding: [0xc3]
633  %mask = icmp ne <2 x i64> %mask1, zeroinitializer
634  %vaddr = bitcast i8* %addr to <2 x i64>*
635  %r = load <2 x i64>, <2 x i64>* %vaddr, align 1
636  %res = select <2 x i1> %mask, <2 x i64> %r, <2 x i64> zeroinitializer
637  ret <2 x i64>%res
638}
639
640define <4 x float> @test_128_25(i8 * %addr, <4 x float> %old, <4 x i32> %mask1) {
641; CHECK-LABEL: test_128_25:
642; CHECK:       ## %bb.0:
643; CHECK-NEXT:    vptestmd %xmm1, %xmm1, %k1 ## encoding: [0x62,0xf2,0x75,0x08,0x27,0xc9]
644; CHECK-NEXT:    vmovaps (%rdi), %xmm0 {%k1} ## encoding: [0x62,0xf1,0x7c,0x09,0x28,0x07]
645; CHECK-NEXT:    retq ## encoding: [0xc3]
646  %mask = icmp ne <4 x i32> %mask1, zeroinitializer
647  %vaddr = bitcast i8* %addr to <4 x float>*
648  %r = load <4 x float>, <4 x float>* %vaddr, align 16
649  %res = select <4 x i1> %mask, <4 x float> %r, <4 x float> %old
650  ret <4 x float>%res
651}
652
653define <4 x float> @test_128_26(i8 * %addr, <4 x float> %old, <4 x i32> %mask1) {
654; CHECK-LABEL: test_128_26:
655; CHECK:       ## %bb.0:
656; CHECK-NEXT:    vptestmd %xmm1, %xmm1, %k1 ## encoding: [0x62,0xf2,0x75,0x08,0x27,0xc9]
657; CHECK-NEXT:    vmovups (%rdi), %xmm0 {%k1} ## encoding: [0x62,0xf1,0x7c,0x09,0x10,0x07]
658; CHECK-NEXT:    retq ## encoding: [0xc3]
659  %mask = icmp ne <4 x i32> %mask1, zeroinitializer
660  %vaddr = bitcast i8* %addr to <4 x float>*
661  %r = load <4 x float>, <4 x float>* %vaddr, align 1
662  %res = select <4 x i1> %mask, <4 x float> %r, <4 x float> %old
663  ret <4 x float>%res
664}
665
666define <4 x float> @test_128_27(i8 * %addr, <4 x i32> %mask1) {
667; CHECK-LABEL: test_128_27:
668; CHECK:       ## %bb.0:
669; CHECK-NEXT:    vptestmd %xmm0, %xmm0, %k1 ## encoding: [0x62,0xf2,0x7d,0x08,0x27,0xc8]
670; CHECK-NEXT:    vmovaps (%rdi), %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x89,0x28,0x07]
671; CHECK-NEXT:    retq ## encoding: [0xc3]
672  %mask = icmp ne <4 x i32> %mask1, zeroinitializer
673  %vaddr = bitcast i8* %addr to <4 x float>*
674  %r = load <4 x float>, <4 x float>* %vaddr, align 16
675  %res = select <4 x i1> %mask, <4 x float> %r, <4 x float> zeroinitializer
676  ret <4 x float>%res
677}
678
679define <4 x float> @test_128_28(i8 * %addr, <4 x i32> %mask1) {
680; CHECK-LABEL: test_128_28:
681; CHECK:       ## %bb.0:
682; CHECK-NEXT:    vptestmd %xmm0, %xmm0, %k1 ## encoding: [0x62,0xf2,0x7d,0x08,0x27,0xc8]
683; CHECK-NEXT:    vmovups (%rdi), %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x89,0x10,0x07]
684; CHECK-NEXT:    retq ## encoding: [0xc3]
685  %mask = icmp ne <4 x i32> %mask1, zeroinitializer
686  %vaddr = bitcast i8* %addr to <4 x float>*
687  %r = load <4 x float>, <4 x float>* %vaddr, align 1
688  %res = select <4 x i1> %mask, <4 x float> %r, <4 x float> zeroinitializer
689  ret <4 x float>%res
690}
691
692define <2 x double> @test_128_29(i8 * %addr, <2 x double> %old, <2 x i64> %mask1) {
693; CHECK-LABEL: test_128_29:
694; CHECK:       ## %bb.0:
695; CHECK-NEXT:    vptestmq %xmm1, %xmm1, %k1 ## encoding: [0x62,0xf2,0xf5,0x08,0x27,0xc9]
696; CHECK-NEXT:    vmovapd (%rdi), %xmm0 {%k1} ## encoding: [0x62,0xf1,0xfd,0x09,0x28,0x07]
697; CHECK-NEXT:    retq ## encoding: [0xc3]
698  %mask = icmp ne <2 x i64> %mask1, zeroinitializer
699  %vaddr = bitcast i8* %addr to <2 x double>*
700  %r = load <2 x double>, <2 x double>* %vaddr, align 16
701  %res = select <2 x i1> %mask, <2 x double> %r, <2 x double> %old
702  ret <2 x double>%res
703}
704
705define <2 x double> @test_128_30(i8 * %addr, <2 x double> %old, <2 x i64> %mask1) {
706; CHECK-LABEL: test_128_30:
707; CHECK:       ## %bb.0:
708; CHECK-NEXT:    vptestmq %xmm1, %xmm1, %k1 ## encoding: [0x62,0xf2,0xf5,0x08,0x27,0xc9]
709; CHECK-NEXT:    vmovupd (%rdi), %xmm0 {%k1} ## encoding: [0x62,0xf1,0xfd,0x09,0x10,0x07]
710; CHECK-NEXT:    retq ## encoding: [0xc3]
711  %mask = icmp ne <2 x i64> %mask1, zeroinitializer
712  %vaddr = bitcast i8* %addr to <2 x double>*
713  %r = load <2 x double>, <2 x double>* %vaddr, align 1
714  %res = select <2 x i1> %mask, <2 x double> %r, <2 x double> %old
715  ret <2 x double>%res
716}
717
718define <2 x double> @test_128_31(i8 * %addr, <2 x i64> %mask1) {
719; CHECK-LABEL: test_128_31:
720; CHECK:       ## %bb.0:
721; CHECK-NEXT:    vptestmq %xmm0, %xmm0, %k1 ## encoding: [0x62,0xf2,0xfd,0x08,0x27,0xc8]
722; CHECK-NEXT:    vmovapd (%rdi), %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0x89,0x28,0x07]
723; CHECK-NEXT:    retq ## encoding: [0xc3]
724  %mask = icmp ne <2 x i64> %mask1, zeroinitializer
725  %vaddr = bitcast i8* %addr to <2 x double>*
726  %r = load <2 x double>, <2 x double>* %vaddr, align 16
727  %res = select <2 x i1> %mask, <2 x double> %r, <2 x double> zeroinitializer
728  ret <2 x double>%res
729}
730
731define <2 x double> @test_128_32(i8 * %addr, <2 x i64> %mask1) {
732; CHECK-LABEL: test_128_32:
733; CHECK:       ## %bb.0:
734; CHECK-NEXT:    vptestmq %xmm0, %xmm0, %k1 ## encoding: [0x62,0xf2,0xfd,0x08,0x27,0xc8]
735; CHECK-NEXT:    vmovupd (%rdi), %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0x89,0x10,0x07]
736; CHECK-NEXT:    retq ## encoding: [0xc3]
737  %mask = icmp ne <2 x i64> %mask1, zeroinitializer
738  %vaddr = bitcast i8* %addr to <2 x double>*
739  %r = load <2 x double>, <2 x double>* %vaddr, align 1
740  %res = select <2 x i1> %mask, <2 x double> %r, <2 x double> zeroinitializer
741  ret <2 x double>%res
742}
743
744