1; RUN: llc -O0 -fast-isel -fast-isel-abort=1 -mtriple=x86_64-unknown-unknown -mattr=+sse2 < %s | FileCheck %s --check-prefix=SSE --check-prefix=ALL
2; RUN: llc -O0 -fast-isel -fast-isel-abort=1 -mtriple=x86_64-unknown-unknown -mattr=+avx < %s | FileCheck %s --check-prefix=AVX --check-prefix=ALL
3; RUN: llc -O0 -fast-isel -fast-isel-abort=1 -mtriple=x86_64-unknown-unknown -mattr=+avx512f < %s | FileCheck %s --check-prefix=KNL
4
5; Verify that fast-isel knows how to select aligned/unaligned vector loads.
6; Also verify that the selected load instruction is in the correct domain.
7
8define <16 x i8> @test_v16i8(<16 x i8>* %V) {
9; ALL-LABEL: test_v16i8:
10; SSE: movdqa  (%rdi), %xmm0
11; AVX: vmovdqa  (%rdi), %xmm0
12; ALL-NEXT: retq
13entry:
14  %0 = load <16 x i8>, <16 x i8>* %V, align 16
15  ret <16 x i8> %0
16}
17
18define <8 x i16> @test_v8i16(<8 x i16>* %V) {
19; ALL-LABEL: test_v8i16:
20; SSE: movdqa  (%rdi), %xmm0
21; AVX: vmovdqa  (%rdi), %xmm0
22; ALL-NEXT: retq
23entry:
24  %0 = load <8 x i16>, <8 x i16>* %V, align 16
25  ret <8 x i16> %0
26}
27
28define <4 x i32> @test_v4i32(<4 x i32>* %V) {
29; ALL-LABEL: test_v4i32:
30; SSE: movdqa  (%rdi), %xmm0
31; AVX: vmovdqa  (%rdi), %xmm0
32; ALL-NEXT: retq
33entry:
34  %0 = load <4 x i32>, <4 x i32>* %V, align 16
35  ret <4 x i32> %0
36}
37
38define <2 x i64> @test_v2i64(<2 x i64>* %V) {
39; ALL-LABEL: test_v2i64:
40; SSE: movdqa  (%rdi), %xmm0
41; AVX: vmovdqa  (%rdi), %xmm0
42; ALL-NEXT: retq
43entry:
44  %0 = load <2 x i64>, <2 x i64>* %V, align 16
45  ret <2 x i64> %0
46}
47
48define <16 x i8> @test_v16i8_unaligned(<16 x i8>* %V) {
49; ALL-LABEL: test_v16i8_unaligned:
50; SSE: movdqu  (%rdi), %xmm0
51; AVX: vmovdqu  (%rdi), %xmm0
52; ALL-NEXT: retq
53entry:
54  %0 = load <16 x i8>, <16 x i8>* %V, align 4
55  ret <16 x i8> %0
56}
57
58define <8 x i16> @test_v8i16_unaligned(<8 x i16>* %V) {
59; ALL-LABEL: test_v8i16_unaligned:
60; SSE: movdqu  (%rdi), %xmm0
61; AVX: vmovdqu  (%rdi), %xmm0
62; ALL-NEXT: retq
63entry:
64  %0 = load <8 x i16>, <8 x i16>* %V, align 4
65  ret <8 x i16> %0
66}
67
68define <4 x i32> @test_v4i32_unaligned(<4 x i32>* %V) {
69; ALL-LABEL: test_v4i32_unaligned:
70; SSE: movdqu  (%rdi), %xmm0
71; AVX: vmovdqu  (%rdi), %xmm0
72; ALL-NEXT: retq
73entry:
74  %0 = load <4 x i32>, <4 x i32>* %V, align 4
75  ret <4 x i32> %0
76}
77
78define <2 x i64> @test_v2i64_unaligned(<2 x i64>* %V) {
79; ALL-LABEL: test_v2i64_unaligned:
80; SSE: movdqu  (%rdi), %xmm0
81; AVX: vmovdqu  (%rdi), %xmm0
82; ALL-NEXT: retq
83entry:
84  %0 = load <2 x i64>, <2 x i64>* %V, align 4
85  ret <2 x i64> %0
86}
87
88define <4 x float> @test_v4f32(<4 x float>* %V) {
89; ALL-LABEL: test_v4f32:
90; SSE: movaps  (%rdi), %xmm0
91; AVX: vmovaps  (%rdi), %xmm0
92; ALL-NEXT: retq
93entry:
94  %0 = load <4 x float>, <4 x float>* %V, align 16
95  ret <4 x float> %0
96}
97
98define <2 x double> @test_v2f64(<2 x double>* %V) {
99; ALL-LABEL: test_v2f64:
100; SSE: movapd  (%rdi), %xmm0
101; AVX: vmovapd  (%rdi), %xmm0
102; ALL-NEXT: retq
103entry:
104  %0 = load <2 x double>, <2 x double>* %V, align 16
105  ret <2 x double> %0
106}
107
108define <4 x float> @test_v4f32_unaligned(<4 x float>* %V) {
109; ALL-LABEL: test_v4f32_unaligned:
110; SSE: movups  (%rdi), %xmm0
111; AVX: vmovups  (%rdi), %xmm0
112; ALL-NEXT: retq
113entry:
114  %0 = load <4 x float>, <4 x float>* %V, align 4
115  ret <4 x float> %0
116}
117
118define <2 x double> @test_v2f64_unaligned(<2 x double>* %V) {
119; ALL-LABEL: test_v2f64_unaligned:
120; SSE: movupd  (%rdi), %xmm0
121; AVX: vmovupd  (%rdi), %xmm0
122; ALL-NEXT: retq
123entry:
124  %0 = load <2 x double>, <2 x double>* %V, align 4
125  ret <2 x double> %0
126}
127
128define <16 x i8> @test_v16i8_abi_alignment(<16 x i8>* %V) {
129; ALL-LABEL: test_v16i8_abi_alignment:
130; SSE: movdqa  (%rdi), %xmm0
131; AVX: vmovdqa  (%rdi), %xmm0
132; ALL-NEXT: retq
133entry:
134  %0 = load <16 x i8>, <16 x i8>* %V
135  ret <16 x i8> %0
136}
137
138define <8 x i16> @test_v8i16_abi_alignment(<8 x i16>* %V) {
139; ALL-LABEL: test_v8i16_abi_alignment:
140; SSE: movdqa  (%rdi), %xmm0
141; AVX: vmovdqa  (%rdi), %xmm0
142; ALL-NEXT: retq
143entry:
144  %0 = load <8 x i16>, <8 x i16>* %V
145  ret <8 x i16> %0
146}
147
148define <4 x i32> @test_v4i32_abi_alignment(<4 x i32>* %V) {
149; ALL-LABEL: test_v4i32_abi_alignment:
150; SSE: movdqa  (%rdi), %xmm0
151; AVX: vmovdqa  (%rdi), %xmm0
152; ALL-NEXT: retq
153entry:
154  %0 = load <4 x i32>, <4 x i32>* %V
155  ret <4 x i32> %0
156}
157
158define <2 x i64> @test_v2i64_abi_alignment(<2 x i64>* %V) {
159; ALL-LABEL: test_v2i64_abi_alignment:
160; SSE: movdqa  (%rdi), %xmm0
161; AVX: vmovdqa  (%rdi), %xmm0
162; ALL-NEXT: retq
163entry:
164  %0 = load <2 x i64>, <2 x i64>* %V
165  ret <2 x i64> %0
166}
167
168define <4 x float> @test_v4f32_abi_alignment(<4 x float>* %V) {
169; ALL-LABEL: test_v4f32_abi_alignment:
170; SSE: movaps  (%rdi), %xmm0
171; AVX: vmovaps  (%rdi), %xmm0
172; ALL-NEXT: retq
173entry:
174  %0 = load <4 x float>, <4 x float>* %V
175  ret <4 x float> %0
176}
177
178define <2 x double> @test_v2f64_abi_alignment(<2 x double>* %V) {
179; ALL-LABEL: test_v2f64_abi_alignment:
180; SSE: movapd  (%rdi), %xmm0
181; AVX: vmovapd  (%rdi), %xmm0
182; ALL-NEXT: retq
183entry:
184  %0 = load <2 x double>, <2 x double>* %V
185  ret <2 x double> %0
186}
187
188define <8 x i64> @test_v8i64_alignment(<8 x i64>* %V) {
189; KNL-LABEL: test_v8i64_alignment:
190; KNL:       # BB#0: # %entry
191; KNL-NEXT:    vmovdqa64 (%rdi), %zmm0
192; KNL-NEXT:    retq
193entry:
194  %0 = load <8 x i64>, <8 x i64>* %V, align 64
195  ret <8 x i64> %0
196}
197
198define <8 x i64> @test_v8i64(<8 x i64>* %V) {
199; KNL-LABEL: test_v8i64:
200; KNL:       # BB#0: # %entry
201; KNL-NEXT:    vmovdqu64 (%rdi), %zmm0
202; KNL-NEXT:    retq
203entry:
204  %0 = load <8 x i64>, <8 x i64>* %V, align 4
205  ret <8 x i64> %0
206}
207