1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mcpu=pwr9 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
3; RUN:   -mtriple=powerpc64-unknown-unknown < %s | FileCheck %s \
4; RUN:   -check-prefix=P9
5; RUN: llc -mcpu=pwr8 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
6; RUN:   -mtriple=powerpc64le-unknown-unknown < %s | FileCheck %s \
7; RUN:   -check-prefix=P8
8define dso_local void @test(<2 x double>* nocapture %c, double* nocapture readonly %a) local_unnamed_addr {
9; P9-LABEL: test:
10; P9:       # %bb.0: # %entry
11; P9-NEXT:    addi r4, r4, 24
12; P9-NEXT:    lxvdsx vs0, 0, r4
13; P9-NEXT:    stxv vs0, 0(r3)
14; P9-NEXT:    blr
15;
16; P8-LABEL: test:
17; P8:       # %bb.0: # %entry
18; P8-NEXT:    addi r4, r4, 24
19; P8-NEXT:    lxvdsx vs0, 0, r4
20; P8-NEXT:    stxvd2x vs0, 0, r3
21; P8-NEXT:    blr
22entry:
23  %arrayidx = getelementptr inbounds double, double* %a, i64 3
24  %0 = load double, double* %arrayidx, align 8
25  %splat.splatinsert.i = insertelement <2 x double> undef, double %0, i32 0
26  %splat.splat.i = shufflevector <2 x double> %splat.splatinsert.i, <2 x double> undef, <2 x i32> zeroinitializer
27  store <2 x double> %splat.splat.i, <2 x double>* %c, align 16
28  ret void
29}
30
31define dso_local void @test2(<4 x float>* nocapture %c, float* nocapture readonly %a) local_unnamed_addr {
32; P9-LABEL: test2:
33; P9:       # %bb.0: # %entry
34; P9-NEXT:    addi r4, r4, 12
35; P9-NEXT:    lxvwsx vs0, 0, r4
36; P9-NEXT:    stxv vs0, 0(r3)
37; P9-NEXT:    blr
38;
39; P8-LABEL: test2:
40; P8:       # %bb.0: # %entry
41; P8-NEXT:    addi r4, r4, 12
42; P8-NEXT:    lfiwzx f0, 0, r4
43; P8-NEXT:    xxspltw v2, vs0, 1
44; P8-NEXT:    stvx v2, 0, r3
45; P8-NEXT:    blr
46entry:
47  %arrayidx = getelementptr inbounds float, float* %a, i64 3
48  %0 = load float, float* %arrayidx, align 4
49  %splat.splatinsert.i = insertelement <4 x float> undef, float %0, i32 0
50  %splat.splat.i = shufflevector <4 x float> %splat.splatinsert.i, <4 x float> undef, <4 x i32> zeroinitializer
51  store <4 x float> %splat.splat.i, <4 x float>* %c, align 16
52  ret void
53}
54
55define dso_local void @test3(<4 x i32>* nocapture %c, i32* nocapture readonly %a) local_unnamed_addr {
56; P9-LABEL: test3:
57; P9:       # %bb.0: # %entry
58; P9-NEXT:    addi r4, r4, 12
59; P9-NEXT:    lxvwsx vs0, 0, r4
60; P9-NEXT:    stxv vs0, 0(r3)
61; P9-NEXT:    blr
62;
63; P8-LABEL: test3:
64; P8:       # %bb.0: # %entry
65; P8-NEXT:    addi r4, r4, 12
66; P8-NEXT:    lfiwzx f0, 0, r4
67; P8-NEXT:    xxspltw v2, vs0, 1
68; P8-NEXT:    stvx v2, 0, r3
69; P8-NEXT:    blr
70entry:
71  %arrayidx = getelementptr inbounds i32, i32* %a, i64 3
72  %0 = load i32, i32* %arrayidx, align 4
73  %splat.splatinsert.i = insertelement <4 x i32> undef, i32 %0, i32 0
74  %splat.splat.i = shufflevector <4 x i32> %splat.splatinsert.i, <4 x i32> undef, <4 x i32> zeroinitializer
75  store <4 x i32> %splat.splat.i, <4 x i32>* %c, align 16
76  ret void
77}
78
79define dso_local void @test4(<2 x i64>* nocapture %c, i64* nocapture readonly %a) local_unnamed_addr {
80; P9-LABEL: test4:
81; P9:       # %bb.0: # %entry
82; P9-NEXT:    addi r4, r4, 24
83; P9-NEXT:    lxvdsx vs0, 0, r4
84; P9-NEXT:    stxv vs0, 0(r3)
85; P9-NEXT:    blr
86;
87; P8-LABEL: test4:
88; P8:       # %bb.0: # %entry
89; P8-NEXT:    addi r4, r4, 24
90; P8-NEXT:    lxvdsx vs0, 0, r4
91; P8-NEXT:    stxvd2x vs0, 0, r3
92; P8-NEXT:    blr
93entry:
94  %arrayidx = getelementptr inbounds i64, i64* %a, i64 3
95  %0 = load i64, i64* %arrayidx, align 8
96  %splat.splatinsert.i = insertelement <2 x i64> undef, i64 %0, i32 0
97  %splat.splat.i = shufflevector <2 x i64> %splat.splatinsert.i, <2 x i64> undef, <2 x i32> zeroinitializer
98  store <2 x i64> %splat.splat.i, <2 x i64>* %c, align 16
99  ret void
100}
101
102define <16 x i8> @unadjusted_lxvwsx(i32* %s, i32* %t) {
103; P9-LABEL: unadjusted_lxvwsx:
104; P9:       # %bb.0: # %entry
105; P9-NEXT:    lxvwsx v2, 0, r3
106; P9-NEXT:    blr
107;
108; P8-LABEL: unadjusted_lxvwsx:
109; P8:       # %bb.0: # %entry
110; P8-NEXT:    lfiwzx f0, 0, r3
111; P8-NEXT:    xxspltw v2, vs0, 1
112; P8-NEXT:    blr
113  entry:
114    %0 = bitcast i32* %s to <4 x i8>*
115    %1 = load <4 x i8>, <4 x i8>* %0, align 4
116    %2 = shufflevector <4 x i8> %1, <4 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
117    ret <16 x i8> %2
118}
119
120define <16 x i8> @adjusted_lxvwsx(i64* %s, i64* %t) {
121; P9-LABEL: adjusted_lxvwsx:
122; P9:       # %bb.0: # %entry
123; P9-NEXT:    addi r3, r3, 4
124; P9-NEXT:    lxvwsx v2, 0, r3
125; P9-NEXT:    blr
126;
127; P8-LABEL: adjusted_lxvwsx:
128; P8:       # %bb.0: # %entry
129; P8-NEXT:    ld r3, 0(r3)
130; P8-NEXT:    mtfprd f0, r3
131; P8-NEXT:    xxspltw v2, vs0, 0
132; P8-NEXT:    blr
133  entry:
134    %0 = bitcast i64* %s to <8 x i8>*
135    %1 = load <8 x i8>, <8 x i8>* %0, align 8
136    %2 = shufflevector <8 x i8> %1, <8 x i8> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
137    ret <16 x i8> %2
138}
139
140define <16 x i8> @unadjusted_lxvwsx_v16i8(<16 x i8> *%s, <16 x i8> %t) {
141; P9-LABEL: unadjusted_lxvwsx_v16i8:
142; P9:       # %bb.0: # %entry
143; P9-NEXT:    lxvwsx v2, 0, r3
144; P9-NEXT:    blr
145;
146; P8-LABEL: unadjusted_lxvwsx_v16i8:
147; P8:       # %bb.0: # %entry
148; P8-NEXT:    lvx v2, 0, r3
149; P8-NEXT:    xxspltw v2, v2, 3
150; P8-NEXT:    blr
151  entry:
152    %0 = load <16 x i8>, <16 x i8>* %s, align 16
153    %1 = shufflevector <16 x i8> %0, <16 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
154    ret <16 x i8> %1
155}
156
157define <16 x i8> @adjusted_lxvwsx_v16i8(<16 x i8> *%s, <16 x i8> %t) {
158; P9-LABEL: adjusted_lxvwsx_v16i8:
159; P9:       # %bb.0: # %entry
160; P9-NEXT:    addi r3, r3, 4
161; P9-NEXT:    lxvwsx v2, 0, r3
162; P9-NEXT:    blr
163;
164; P8-LABEL: adjusted_lxvwsx_v16i8:
165; P8:       # %bb.0: # %entry
166; P8-NEXT:    lvx v2, 0, r3
167; P8-NEXT:    xxspltw v2, v2, 2
168; P8-NEXT:    blr
169  entry:
170    %0 = load <16 x i8>, <16 x i8>* %s, align 16
171    %1 = shufflevector <16 x i8> %0, <16 x i8> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
172    ret <16 x i8> %1
173}
174
175define <16 x i8> @adjusted_lxvwsx_v16i8_2(<16 x i8> *%s, <16 x i8> %t) {
176; P9-LABEL: adjusted_lxvwsx_v16i8_2:
177; P9:       # %bb.0: # %entry
178; P9-NEXT:    addi r3, r3, 8
179; P9-NEXT:    lxvwsx v2, 0, r3
180; P9-NEXT:    blr
181;
182; P8-LABEL: adjusted_lxvwsx_v16i8_2:
183; P8:       # %bb.0: # %entry
184; P8-NEXT:    lvx v2, 0, r3
185; P8-NEXT:    xxspltw v2, v2, 1
186; P8-NEXT:    blr
187  entry:
188    %0 = load <16 x i8>, <16 x i8>* %s, align 16
189    %1 = shufflevector <16 x i8> %0, <16 x i8> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 8, i32 9, i32 10, i32 11, i32 8, i32 9, i32 10, i32 11, i32 8, i32 9, i32 10, i32 11>
190    ret <16 x i8> %1
191}
192
193define <16 x i8> @adjusted_lxvwsx_v16i8_3(<16 x i8> *%s, <16 x i8> %t) {
194; P9-LABEL: adjusted_lxvwsx_v16i8_3:
195; P9:       # %bb.0: # %entry
196; P9-NEXT:    addi r3, r3, 12
197; P9-NEXT:    lxvwsx v2, 0, r3
198; P9-NEXT:    blr
199;
200; P8-LABEL: adjusted_lxvwsx_v16i8_3:
201; P8:       # %bb.0: # %entry
202; P8-NEXT:    lvx v2, 0, r3
203; P8-NEXT:    xxspltw v2, v2, 0
204; P8-NEXT:    blr
205  entry:
206    %0 = load <16 x i8>, <16 x i8>* %s, align 16
207    %1 = shufflevector <16 x i8> %0, <16 x i8> undef, <16 x i32> <i32 12, i32 13, i32 14, i32 15, i32 12, i32 13, i32 14, i32 15, i32 12, i32 13, i32 14, i32 15, i32 12, i32 13, i32 14, i32 15>
208    ret <16 x i8> %1
209}
210
211define <16 x i8> @unadjusted_lxvdsx(i64* %s, i64* %t) {
212; P9-LABEL: unadjusted_lxvdsx:
213; P9:       # %bb.0: # %entry
214; P9-NEXT:    lxvdsx v2, 0, r3
215; P9-NEXT:    blr
216;
217; P8-LABEL: unadjusted_lxvdsx:
218; P8:       # %bb.0: # %entry
219; P8-NEXT:    lxvdsx v2, 0, r3
220; P8-NEXT:    blr
221  entry:
222    %0 = bitcast i64* %s to <8 x i8>*
223    %1 = load <8 x i8>, <8 x i8>* %0, align 8
224    %2 = shufflevector <8 x i8> %1, <8 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
225    ret <16 x i8> %2
226}
227
228define <16 x i8> @unadjusted_lxvdsx_v16i8(<16 x i8> *%s, <16 x i8> %t) {
229; P9-LABEL: unadjusted_lxvdsx_v16i8:
230; P9:       # %bb.0: # %entry
231; P9-NEXT:    lxvdsx v2, 0, r3
232; P9-NEXT:    blr
233;
234; P8-LABEL: unadjusted_lxvdsx_v16i8:
235; P8:       # %bb.0: # %entry
236; P8-NEXT:    lxvdsx v2, 0, r3
237; P8-NEXT:    blr
238  entry:
239    %0 = load <16 x i8>, <16 x i8>* %s, align 16
240    %1 = shufflevector <16 x i8> %0, <16 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
241    ret <16 x i8> %1
242}
243
244define <16 x i8> @adjusted_lxvdsx_v16i8(<16 x i8> *%s, <16 x i8> %t) {
245; P9-LABEL: adjusted_lxvdsx_v16i8:
246; P9:       # %bb.0: # %entry
247; P9-NEXT:    addi r3, r3, 8
248; P9-NEXT:    lxvdsx v2, 0, r3
249; P9-NEXT:    blr
250;
251; P8-LABEL: adjusted_lxvdsx_v16i8:
252; P8:       # %bb.0: # %entry
253; P8-NEXT:    addi r3, r3, 8
254; P8-NEXT:    lxvdsx v2, 0, r3
255; P8-NEXT:    blr
256  entry:
257    %0 = load <16 x i8>, <16 x i8>* %s, align 16
258    %1 = shufflevector <16 x i8> %0, <16 x i8> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
259    ret <16 x i8> %1
260}
261