1; RUN: llc -O0 -fast-isel -fast-isel-abort=1 -mtriple=x86_64-unknown-unknown -mattr=+sse2 < %s | FileCheck %s --check-prefix=SSE --check-prefix=ALL
2; RUN: llc -O0 -fast-isel -fast-isel-abort=1 -mtriple=x86_64-unknown-unknown -mattr=+avx < %s | FileCheck %s --check-prefix=AVX --check-prefix=ALL
3
4; Verify that fast-isel knows how to select aligned/unaligned vector loads.
5; Also verify that the selected load instruction is in the correct domain.
6
7define <16 x i8> @test_v16i8(<16 x i8>* %V) {
8; ALL-LABEL: test_v16i8:
9; SSE: movdqa  (%rdi), %xmm0
10; AVX: vmovdqa  (%rdi), %xmm0
11; ALL-NEXT: retq
12entry:
13  %0 = load <16 x i8>, <16 x i8>* %V, align 16
14  ret <16 x i8> %0
15}
16
17define <8 x i16> @test_v8i16(<8 x i16>* %V) {
18; ALL-LABEL: test_v8i16:
19; SSE: movdqa  (%rdi), %xmm0
20; AVX: vmovdqa  (%rdi), %xmm0
21; ALL-NEXT: retq
22entry:
23  %0 = load <8 x i16>, <8 x i16>* %V, align 16
24  ret <8 x i16> %0
25}
26
27define <4 x i32> @test_v4i32(<4 x i32>* %V) {
28; ALL-LABEL: test_v4i32:
29; SSE: movdqa  (%rdi), %xmm0
30; AVX: vmovdqa  (%rdi), %xmm0
31; ALL-NEXT: retq
32entry:
33  %0 = load <4 x i32>, <4 x i32>* %V, align 16
34  ret <4 x i32> %0
35}
36
37define <2 x i64> @test_v2i64(<2 x i64>* %V) {
38; ALL-LABEL: test_v2i64:
39; SSE: movdqa  (%rdi), %xmm0
40; AVX: vmovdqa  (%rdi), %xmm0
41; ALL-NEXT: retq
42entry:
43  %0 = load <2 x i64>, <2 x i64>* %V, align 16
44  ret <2 x i64> %0
45}
46
47define <16 x i8> @test_v16i8_unaligned(<16 x i8>* %V) {
48; ALL-LABEL: test_v16i8_unaligned:
49; SSE: movdqu  (%rdi), %xmm0
50; AVX: vmovdqu  (%rdi), %xmm0
51; ALL-NEXT: retq
52entry:
53  %0 = load <16 x i8>, <16 x i8>* %V, align 4
54  ret <16 x i8> %0
55}
56
57define <8 x i16> @test_v8i16_unaligned(<8 x i16>* %V) {
58; ALL-LABEL: test_v8i16_unaligned:
59; SSE: movdqu  (%rdi), %xmm0
60; AVX: vmovdqu  (%rdi), %xmm0
61; ALL-NEXT: retq
62entry:
63  %0 = load <8 x i16>, <8 x i16>* %V, align 4
64  ret <8 x i16> %0
65}
66
67define <4 x i32> @test_v4i32_unaligned(<4 x i32>* %V) {
68; ALL-LABEL: test_v4i32_unaligned:
69; SSE: movdqu  (%rdi), %xmm0
70; AVX: vmovdqu  (%rdi), %xmm0
71; ALL-NEXT: retq
72entry:
73  %0 = load <4 x i32>, <4 x i32>* %V, align 4
74  ret <4 x i32> %0
75}
76
77define <2 x i64> @test_v2i64_unaligned(<2 x i64>* %V) {
78; ALL-LABEL: test_v2i64_unaligned:
79; SSE: movdqu  (%rdi), %xmm0
80; AVX: vmovdqu  (%rdi), %xmm0
81; ALL-NEXT: retq
82entry:
83  %0 = load <2 x i64>, <2 x i64>* %V, align 4
84  ret <2 x i64> %0
85}
86
87define <4 x float> @test_v4f32(<4 x float>* %V) {
88; ALL-LABEL: test_v4f32:
89; SSE: movaps  (%rdi), %xmm0
90; AVX: vmovaps  (%rdi), %xmm0
91; ALL-NEXT: retq
92entry:
93  %0 = load <4 x float>, <4 x float>* %V, align 16
94  ret <4 x float> %0
95}
96
97define <2 x double> @test_v2f64(<2 x double>* %V) {
98; ALL-LABEL: test_v2f64:
99; SSE: movapd  (%rdi), %xmm0
100; AVX: vmovapd  (%rdi), %xmm0
101; ALL-NEXT: retq
102entry:
103  %0 = load <2 x double>, <2 x double>* %V, align 16
104  ret <2 x double> %0
105}
106
107define <4 x float> @test_v4f32_unaligned(<4 x float>* %V) {
108; ALL-LABEL: test_v4f32_unaligned:
109; SSE: movups  (%rdi), %xmm0
110; AVX: vmovups  (%rdi), %xmm0
111; ALL-NEXT: retq
112entry:
113  %0 = load <4 x float>, <4 x float>* %V, align 4
114  ret <4 x float> %0
115}
116
117define <2 x double> @test_v2f64_unaligned(<2 x double>* %V) {
118; ALL-LABEL: test_v2f64_unaligned:
119; SSE: movupd  (%rdi), %xmm0
120; AVX: vmovupd  (%rdi), %xmm0
121; ALL-NEXT: retq
122entry:
123  %0 = load <2 x double>, <2 x double>* %V, align 4
124  ret <2 x double> %0
125}
126
127define <16 x i8> @test_v16i8_abi_alignment(<16 x i8>* %V) {
128; ALL-LABEL: test_v16i8_abi_alignment:
129; SSE: movdqa  (%rdi), %xmm0
130; AVX: vmovdqa  (%rdi), %xmm0
131; ALL-NEXT: retq
132entry:
133  %0 = load <16 x i8>, <16 x i8>* %V
134  ret <16 x i8> %0
135}
136
137define <8 x i16> @test_v8i16_abi_alignment(<8 x i16>* %V) {
138; ALL-LABEL: test_v8i16_abi_alignment:
139; SSE: movdqa  (%rdi), %xmm0
140; AVX: vmovdqa  (%rdi), %xmm0
141; ALL-NEXT: retq
142entry:
143  %0 = load <8 x i16>, <8 x i16>* %V
144  ret <8 x i16> %0
145}
146
147define <4 x i32> @test_v4i32_abi_alignment(<4 x i32>* %V) {
148; ALL-LABEL: test_v4i32_abi_alignment:
149; SSE: movdqa  (%rdi), %xmm0
150; AVX: vmovdqa  (%rdi), %xmm0
151; ALL-NEXT: retq
152entry:
153  %0 = load <4 x i32>, <4 x i32>* %V
154  ret <4 x i32> %0
155}
156
157define <2 x i64> @test_v2i64_abi_alignment(<2 x i64>* %V) {
158; ALL-LABEL: test_v2i64_abi_alignment:
159; SSE: movdqa  (%rdi), %xmm0
160; AVX: vmovdqa  (%rdi), %xmm0
161; ALL-NEXT: retq
162entry:
163  %0 = load <2 x i64>, <2 x i64>* %V
164  ret <2 x i64> %0
165}
166
167define <4 x float> @test_v4f32_abi_alignment(<4 x float>* %V) {
168; ALL-LABEL: test_v4f32_abi_alignment:
169; SSE: movaps  (%rdi), %xmm0
170; AVX: vmovaps  (%rdi), %xmm0
171; ALL-NEXT: retq
172entry:
173  %0 = load <4 x float>, <4 x float>* %V
174  ret <4 x float> %0
175}
176
177define <2 x double> @test_v2f64_abi_alignment(<2 x double>* %V) {
178; ALL-LABEL: test_v2f64_abi_alignment:
179; SSE: movapd  (%rdi), %xmm0
180; AVX: vmovapd  (%rdi), %xmm0
181; ALL-NEXT: retq
182entry:
183  %0 = load <2 x double>, <2 x double>* %V
184  ret <2 x double> %0
185}
186