1; RUN: llc < %s -march=x86-64 -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512bw -mattr=+avx512vl --show-mc-encoding| FileCheck %s
2
3; CHECK-LABEL: test_256_1
4; CHECK: vmovdqu8 {{.*}} ## encoding: [0x62
5; CHECK: ret
6define <32 x i8> @test_256_1(i8 * %addr) {
7  %vaddr = bitcast i8* %addr to <32 x i8>*
8  %res = load <32 x i8>, <32 x i8>* %vaddr, align 1
9  ret <32 x i8>%res
10}
11
12; CHECK-LABEL: test_256_2
13; CHECK: vmovdqu8{{.*}} ## encoding: [0x62
14; CHECK: ret
15define void @test_256_2(i8 * %addr, <32 x i8> %data) {
16  %vaddr = bitcast i8* %addr to <32 x i8>*
17  store <32 x i8>%data, <32 x i8>* %vaddr, align 1
18  ret void
19}
20
21; CHECK-LABEL: test_256_3
22; CHECK: vmovdqu8{{.*{%k[1-7]} }}## encoding: [0x62
23; CHECK: ret
24define <32 x i8> @test_256_3(i8 * %addr, <32 x i8> %old, <32 x i8> %mask1) {
25  %mask = icmp ne <32 x i8> %mask1, zeroinitializer
26  %vaddr = bitcast i8* %addr to <32 x i8>*
27  %r = load <32 x i8>, <32 x i8>* %vaddr, align 1
28  %res = select <32 x i1> %mask, <32 x i8> %r, <32 x i8> %old
29  ret <32 x i8>%res
30}
31
32; CHECK-LABEL: test_256_4
33; CHECK: vmovdqu8{{.*{%k[1-7]} {z} }}## encoding: [0x62
34; CHECK: ret
35define <32 x i8> @test_256_4(i8 * %addr, <32 x i8> %mask1) {
36  %mask = icmp ne <32 x i8> %mask1, zeroinitializer
37  %vaddr = bitcast i8* %addr to <32 x i8>*
38  %r = load <32 x i8>, <32 x i8>* %vaddr, align 1
39  %res = select <32 x i1> %mask, <32 x i8> %r, <32 x i8> zeroinitializer
40  ret <32 x i8>%res
41}
42
43; CHECK-LABEL: test_256_5
44; CHECK: vmovdqu16{{.*}} ## encoding: [0x62
45; CHECK: ret
46define <16 x i16> @test_256_5(i8 * %addr) {
47  %vaddr = bitcast i8* %addr to <16 x i16>*
48  %res = load <16 x i16>, <16 x i16>* %vaddr, align 1
49  ret <16 x i16>%res
50}
51
52; CHECK-LABEL: test_256_6
53; CHECK: vmovdqu16{{.*}} ## encoding: [0x62
54; CHECK: ret
55define void @test_256_6(i8 * %addr, <16 x i16> %data) {
56  %vaddr = bitcast i8* %addr to <16 x i16>*
57  store <16 x i16>%data, <16 x i16>* %vaddr, align 1
58  ret void
59}
60
61; CHECK-LABEL: test_256_7
62; CHECK: vmovdqu16{{.*{%k[1-7]} }}## encoding: [0x62
63; CHECK: ret
64define <16 x i16> @test_256_7(i8 * %addr, <16 x i16> %old, <16 x i16> %mask1) {
65  %mask = icmp ne <16 x i16> %mask1, zeroinitializer
66  %vaddr = bitcast i8* %addr to <16 x i16>*
67  %r = load <16 x i16>, <16 x i16>* %vaddr, align 1
68  %res = select <16 x i1> %mask, <16 x i16> %r, <16 x i16> %old
69  ret <16 x i16>%res
70}
71
72; CHECK-LABEL: test_256_8
73; CHECK: vmovdqu16{{.*{%k[1-7]} {z} }}## encoding: [0x62
74; CHECK: ret
75define <16 x i16> @test_256_8(i8 * %addr, <16 x i16> %mask1) {
76  %mask = icmp ne <16 x i16> %mask1, zeroinitializer
77  %vaddr = bitcast i8* %addr to <16 x i16>*
78  %r = load <16 x i16>, <16 x i16>* %vaddr, align 1
79  %res = select <16 x i1> %mask, <16 x i16> %r, <16 x i16> zeroinitializer
80  ret <16 x i16>%res
81}
82
83; CHECK-LABEL: test_128_1
84; CHECK: vmovdqu8 {{.*}} ## encoding: [0x62
85; CHECK: ret
86define <16 x i8> @test_128_1(i8 * %addr) {
87  %vaddr = bitcast i8* %addr to <16 x i8>*
88  %res = load <16 x i8>, <16 x i8>* %vaddr, align 1
89  ret <16 x i8>%res
90}
91
92; CHECK-LABEL: test_128_2
93; CHECK: vmovdqu8{{.*}} ## encoding: [0x62
94; CHECK: ret
95define void @test_128_2(i8 * %addr, <16 x i8> %data) {
96  %vaddr = bitcast i8* %addr to <16 x i8>*
97  store <16 x i8>%data, <16 x i8>* %vaddr, align 1
98  ret void
99}
100
101; CHECK-LABEL: test_128_3
102; CHECK: vmovdqu8{{.*{%k[1-7]} }}## encoding: [0x62
103; CHECK: ret
104define <16 x i8> @test_128_3(i8 * %addr, <16 x i8> %old, <16 x i8> %mask1) {
105  %mask = icmp ne <16 x i8> %mask1, zeroinitializer
106  %vaddr = bitcast i8* %addr to <16 x i8>*
107  %r = load <16 x i8>, <16 x i8>* %vaddr, align 1
108  %res = select <16 x i1> %mask, <16 x i8> %r, <16 x i8> %old
109  ret <16 x i8>%res
110}
111
112; CHECK-LABEL: test_128_4
113; CHECK: vmovdqu8{{.*{%k[1-7]} {z} }}## encoding: [0x62
114; CHECK: ret
115define <16 x i8> @test_128_4(i8 * %addr, <16 x i8> %mask1) {
116  %mask = icmp ne <16 x i8> %mask1, zeroinitializer
117  %vaddr = bitcast i8* %addr to <16 x i8>*
118  %r = load <16 x i8>, <16 x i8>* %vaddr, align 1
119  %res = select <16 x i1> %mask, <16 x i8> %r, <16 x i8> zeroinitializer
120  ret <16 x i8>%res
121}
122
123; CHECK-LABEL: test_128_5
124; CHECK: vmovdqu16{{.*}} ## encoding: [0x62
125; CHECK: ret
126define <8 x i16> @test_128_5(i8 * %addr) {
127  %vaddr = bitcast i8* %addr to <8 x i16>*
128  %res = load <8 x i16>, <8 x i16>* %vaddr, align 1
129  ret <8 x i16>%res
130}
131
132; CHECK-LABEL: test_128_6
133; CHECK: vmovdqu16{{.*}} ## encoding: [0x62
134; CHECK: ret
135define void @test_128_6(i8 * %addr, <8 x i16> %data) {
136  %vaddr = bitcast i8* %addr to <8 x i16>*
137  store <8 x i16>%data, <8 x i16>* %vaddr, align 1
138  ret void
139}
140
141; CHECK-LABEL: test_128_7
142; CHECK: vmovdqu16{{.*{%k[1-7]} }}## encoding: [0x62
143; CHECK: ret
144define <8 x i16> @test_128_7(i8 * %addr, <8 x i16> %old, <8 x i16> %mask1) {
145  %mask = icmp ne <8 x i16> %mask1, zeroinitializer
146  %vaddr = bitcast i8* %addr to <8 x i16>*
147  %r = load <8 x i16>, <8 x i16>* %vaddr, align 1
148  %res = select <8 x i1> %mask, <8 x i16> %r, <8 x i16> %old
149  ret <8 x i16>%res
150}
151
152; CHECK-LABEL: test_128_8
153; CHECK: vmovdqu16{{.*{%k[1-7]} {z} }}## encoding: [0x62
154; CHECK: ret
155define <8 x i16> @test_128_8(i8 * %addr, <8 x i16> %mask1) {
156  %mask = icmp ne <8 x i16> %mask1, zeroinitializer
157  %vaddr = bitcast i8* %addr to <8 x i16>*
158  %r = load <8 x i16>, <8 x i16>* %vaddr, align 1
159  %res = select <8 x i1> %mask, <8 x i16> %r, <8 x i16> zeroinitializer
160  ret <8 x i16>%res
161}
162
163