1; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 < %s | FileCheck %s -check-prefix=CHECK-LE
2; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 < %s | FileCheck %s -check-prefix=CHECK-BE
3; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 -mattr=-vsx < %s | FileCheck %s -check-prefix=CHECK-NOVSX
4; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 -mattr=-vsx < %s | FileCheck %s -check-prefix=CHECK-NOVSX
5; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 -mattr=-vsx < %s | FileCheck %s -check-prefix=CHECK-BE-NOVSX
6; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 -mattr=-vsx < %s | FileCheck %s -check-prefix=CHECK-LE-NOVSX
7
8@x = common global <1 x i128> zeroinitializer, align 16
9@y = common global <1 x i128> zeroinitializer, align 16
10@a = common global i128 zeroinitializer, align 16
11@b = common global i128 zeroinitializer, align 16
12
13; VSX:
14;   %a is passed in register 34
15;   The value of 1 is stored in the TOC.
16;   On LE, ensure the value of 1 is swapped before being used (using xxswapd).
17; VMX (no VSX):
18;   %a is passed in register 2
19;   The value of 1 is stored in the TOC.
20;   No swaps are necessary when using P8 Vector instructions on LE
21define <1 x i128> @v1i128_increment_by_one(<1 x i128> %a) nounwind {
22       %tmp = add <1 x i128> %a, <i128 1>
23       ret <1 x i128> %tmp
24
25; FIXME: Seems a 128-bit literal is materialized by loading from the TOC. There
26;        should be a better way of doing this.
27
28; CHECK-LE-LABEL: @v1i128_increment_by_one
29; CHECK-LE: lxvd2x [[VAL:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
30; CHECK-LE: xxswapd 35, [[VAL]]
31; CHECK-LE: vadduqm 2, 2, 3
32; CHECK-LE: blr
33
34; CHECK-BE-LABEL: @v1i128_increment_by_one
35; CHECK-BE: lxvd2x 35, {{[0-9]+}}, {{[0-9]+}}
36; CHECK-BE-NOT: xxswapd
37; CHECK-BE: vadduqm 2, 2, 3
38; CHECK-BE-NOT: xxswapd 34, {{[0-9]+}}
39; CHECK-BE: blr
40
41; CHECK-NOVSX-LABEL: @v1i128_increment_by_one
42; CHECK-NOVSX-NOT: xxswapd {{[0-9]+}}, {{[0-9]+}}
43; CHECK-NOVSX-NOT: stxvd2x {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
44; CHECK-NOVSX: lvx [[VAL:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
45; CHECK-NOVSX-NOT: lxvd2x {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
46; CHECK-NOVSX-NOT: xxswapd {{[0-9]+}}, {{[0-9]+}}
47; CHECK-NOVSX: vadduqm 2, 2, [[VAL]]
48; CHECK-NOVSX: blr
49}
50
51; VSX:
52;   %a is passed in register 34
53;   %b is passed in register 35
54;   No swaps are necessary when using P8 Vector instructions on LE
55; VMX (no VSX):
56;   %a is passewd in register 2
57;   %b is passed in register 3
58;   On LE, do not need to swap contents of 2 and 3 because the lvx/stvx
59;   instructions no not swap elements
60define <1 x i128> @v1i128_increment_by_val(<1 x i128> %a, <1 x i128> %b) nounwind {
61       %tmp = add <1 x i128> %a, %b
62       ret <1 x i128> %tmp
63
64; CHECK-LE-LABEL: @v1i128_increment_by_val
65; CHECK-LE-NOT: xxswapd
66; CHECK-LE: adduqm 2, 2, 3
67; CHECK-LE: blr
68
69; CHECK-BE-LABEL: @v1i128_increment_by_val
70; CHECK-BE-NOT: xxswapd {{[0-9]+}}, 34
71; CHECK-BE-NOT: xxswapd {{[0-9]+}}, 35
72; CHECK-BE-NOT: xxswapd 34, [[RESULT]]
73; CHECK-BE: adduqm 2, 2, 3
74; CHECK-BE: blr
75
76; CHECK-NOVSX-LABEL: @v1i128_increment_by_val
77; CHECK-NOVSX-NOT: xxswapd 34, [[RESULT]]
78; CHECK-NOVSX: adduqm 2, 2, 3
79; CHECK-NOVSX: blr
80}
81
82; Little Endian (VSX and VMX):
83;   Lower 64-bits of %a are passed in register 3
84;   Upper 64-bits of %a are passed in register 4
85;   Increment lower 64-bits using addic (immediate value of 1)
86;   Increment upper 64-bits using add zero extended
87;   Results are placed in registers 3 and 4
88; Big Endian (VSX and VMX)
89;   Lower 64-bits of %a are passed in register 4
90;   Upper 64-bits of %a are passed in register 3
91;   Increment lower 64-bits using addic (immediate value of 1)
92;   Increment upper 64-bits using add zero extended
93;   Results are placed in registers 3 and 4
94define i128 @i128_increment_by_one(i128 %a) nounwind {
95       %tmp =  add i128 %a,  1
96       ret i128 %tmp
97; CHECK-LE-LABEL: @i128_increment_by_one
98; CHECK-LE: addic 3, 3, 1
99; CHECK-LE-NEXT: addze 4, 4
100; CHECK-LE: blr
101
102; CHECK-BE-LABEL: @i128_increment_by_one
103; CHECK-BE: addic 4, 4, 1
104; CHECK-BE-NEXT: addze 3, 3
105; CHECK-BE: blr
106
107; CHECK-LE-NOVSX-LABEL: @i128_increment_by_one
108; CHECK-LE-NOVSX: addic 3, 3, 1
109; CHECK-LE-NOVSX-NEXT: addze 4, 4
110; CHECK-LE-NOVSX: blr
111
112; CHECK-BE-NOVSX-LABEL: @i128_increment_by_one
113; CHECK-BE-NOVSX: addic 4, 4, 1
114; CHECK-BE-NOVSX-NEXT: addze 3, 3
115; CHECK-BE-NOVSX: blr
116}
117
118; Little Endian (VSX and VMX):
119;   Lower 64-bits of %a are passed in register 3
120;   Upper 64-bits of %a are passed in register 4
121;   Lower 64-bits of %b are passed in register 5
122;   Upper 64-bits of %b are passed in register 6
123;   Add the lower 64-bits using addc on registers 3 and 5
124;   Add the upper 64-bits using adde on registers 4 and 6
125;   Registers 3 and 4 should hold the result
126; Big Endian (VSX and VMX):
127;   Upper 64-bits of %a are passed in register 3
128;   Lower 64-bits of %a are passed in register 4
129;   Upper 64-bits of %b are passed in register 5
130;   Lower 64-bits of %b are passed in register 6
131;   Add the lower 64-bits using addc on registers 4 and 6
132;   Add the upper 64-bits using adde on registers 3 and 5
133;   Registers 3 and 4 should hold the result
134define i128 @i128_increment_by_val(i128 %a, i128 %b) nounwind {
135       %tmp =  add i128 %a, %b
136       ret i128 %tmp
137; CHECK-LE-LABEL: @i128_increment_by_val
138; CHECK-LE: addc 3, 3, 5
139; CHECK-LE-NEXT: adde 4, 4, 6
140; CHECK-LE: blr
141
142; CHECK-BE-LABEL: @i128_increment_by_val
143; CHECK-BE: addc 4, 4, 6
144; CHECK-BE-NEXT: adde 3, 3, 5
145; CHECK-BE: blr
146
147; CHECK-LE-NOVSX-LABEL: @i128_increment_by_val
148; CHECK-LE-NOVSX: addc 3, 3, 5
149; CHECK-LE-NOVSX-NEXT: adde 4, 4, 6
150; CHECK-LE-NOVSX: blr
151
152; CHECK-BE-NOVSX-LABEL: @i128_increment_by_val
153; CHECK-BE-NOVSX: addc 4, 4, 6
154; CHECK-BE-NOVSX-NEXT: adde 3, 3, 5
155; CHECK-BE-NOVSX: blr
156}
157
158
159; Callsites for the routines defined above.
160; Ensure the parameters are loaded in the same order that is expected by the
161; callee. See comments for individual functions above for details on registers
162; used for parameters.
163define <1 x i128> @call_v1i128_increment_by_one() nounwind {
164       %tmp = load <1 x i128>, <1 x i128>* @x, align 16
165       %ret = call <1 x i128> @v1i128_increment_by_one(<1 x i128> %tmp)
166       ret <1 x i128> %ret
167
168; CHECK-LE-LABEL: @call_v1i128_increment_by_one
169; CHECK-LE: lxvd2x [[PARAM:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
170; CHECK-LE: xxswapd 34, [[PARAM]]
171; CHECK-LE: bl v1i128_increment_by_one
172; CHECK-LE: blr
173
174; CHECK-BE-LABEL: @call_v1i128_increment_by_one
175; CHECK-BE: lxvw4x 34, {{[0-9]+}}, {{[0-9]+}}
176; CHECK-BE-NOT: xxswapd 34, {{[0-9]+}}
177; CHECK-BE: bl v1i128_increment_by_one
178; CHECK-BE: blr
179
180; CHECK-NOVSX-LABEL: @call_v1i128_increment_by_one
181; CHECK-NOVSX: lvx 2, {{[0-9]+}}, {{[0-9]+}}
182; CHECK-NOVSX-NOT: xxswapd {{[0-9]+}}, {{[0-9]+}}
183; CHECK-NOVSX: bl v1i128_increment_by_one
184; CHECK-NOVSX: blr
185}
186
187define <1 x i128> @call_v1i128_increment_by_val() nounwind {
188       %tmp = load <1 x i128>, <1 x i128>* @x, align 16
189       %tmp2 = load <1 x i128>, <1 x i128>* @y, align 16
190       %ret = call <1 x i128> @v1i128_increment_by_val(<1 x i128> %tmp, <1 x i128> %tmp2)
191       ret <1 x i128> %ret
192
193; CHECK-LE-LABEL: @call_v1i128_increment_by_val
194; CHECK-LE: lxvd2x [[PARAM1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
195; CHECK-LE: lxvd2x [[PARAM2:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
196; CHECK-LE-DAG: xxswapd 34, [[PARAM1]]
197; CHECK-LE-DAG: xxswapd 35, [[PARAM2]]
198; CHECK-LE: bl v1i128_increment_by_val
199; CHECK-LE: blr
200
201; CHECK-BE-LABEL: @call_v1i128_increment_by_val
202
203
204; CHECK-BE-DAG: lxvw4x 35, {{[0-9]+}}, {{[0-9]+}}
205; CHECK-BE-NOT: xxswapd 34, {{[0-9]+}}
206; CHECK-BE-NOT: xxswapd 35, {{[0-9]+}}
207; CHECK-BE: bl v1i128_increment_by_val
208; CHECK-BE: blr
209
210; CHECK-NOVSX-LABEL: @call_v1i128_increment_by_val
211; CHECK-NOVSX-DAG: lvx 2, {{[0-9]+}}, {{[0-9]+}}
212; CHECK-NOVSX-DAG: lvx 3, {{[0-9]+}}, {{[0-9]+}}
213; CHECK-NOVSX-NOT: xxswapd 34, {{[0-9]+}}
214; CHECK-NOVSX-NOT: xxswapd 35, {{[0-9]+}}
215; CHECK-NOVSX: bl v1i128_increment_by_val
216; CHECK-NOVSX: blr
217
218}
219
220define i128 @call_i128_increment_by_one() nounwind {
221       %tmp = load i128, i128* @a, align 16
222       %ret = call i128 @i128_increment_by_one(i128 %tmp)
223       ret i128 %ret
224;       %ret4 = call i128 @i128_increment_by_val(i128 %tmp2, i128 %tmp2)
225; CHECK-LE-LABEL: @call_i128_increment_by_one
226; CHECK-LE-DAG: ld 3, 0([[BASEREG:[0-9]+]])
227; CHECK-LE-DAG: ld 4, 8([[BASEREG]])
228; CHECK-LE: bl i128_increment_by_one
229; CHECK-LE: blr
230
231; CHECK-BE-LABEL: @call_i128_increment_by_one
232; CHECK-BE-DAG: ld 3, 0([[BASEREG:[0-9]+]])
233; CHECK-BE-DAG: ld 4, 8([[BASEREG]])
234; CHECK-BE: bl i128_increment_by_one
235; CHECK-BE: blr
236
237; CHECK-NOVSX-LABEL: @call_i128_increment_by_one
238; CHECK-NOVSX-DAG: ld 3, 0([[BASEREG:[0-9]+]])
239; CHECK-NOVSX-DAG: ld 4, 8([[BASEREG]])
240; CHECK-NOVSX: bl i128_increment_by_one
241; CHECK-NOVSX: blr
242}
243
244define i128 @call_i128_increment_by_val() nounwind {
245       %tmp = load i128, i128* @a, align 16
246       %tmp2 = load i128, i128* @b, align 16
247       %ret = call i128 @i128_increment_by_val(i128 %tmp, i128 %tmp2)
248       ret i128 %ret
249; CHECK-LE-LABEL: @call_i128_increment_by_val
250; CHECK-LE-DAG: ld 3, 0([[P1BASEREG:[0-9]+]])
251; CHECK-LE-DAG: ld 4, 8([[P1BASEREG]])
252; CHECK-LE-DAG: ld 5, 0([[P2BASEREG:[0-9]+]])
253; CHECK-LE-DAG: ld 6, 8([[P2BASEREG]])
254; CHECK-LE: bl i128_increment_by_val
255; CHECK-LE: blr
256
257; CHECK-BE-LABEL: @call_i128_increment_by_val
258; CHECK-BE-DAG: ld 3, 0([[P1BASEREG:[0-9]+]])
259; CHECK-BE-DAG: ld 4, 8([[P1BASEREG]])
260; CHECK-BE-DAG: ld 5, 0([[P2BASEREG:[0-9]+]])
261; CHECK-BE-DAG: ld 6, 8([[P2BASEREG]])
262; CHECK-BE: bl i128_increment_by_val
263; CHECK-BE: blr
264
265; CHECK-NOVSX-LABEL: @call_i128_increment_by_val
266; CHECK-NOVSX-DAG: ld 3, 0([[P1BASEREG:[0-9]+]])
267; CHECK-NOVSX-DAG: ld 4, 8([[P1BASEREG]])
268; CHECK-NOVSX-DAG: ld 5, 0([[P2BASEREG:[0-9]+]])
269; CHECK-NOVSX-DAG: ld 6, 8([[P2BASEREG]])
270; CHECK-NOVSX: bl i128_increment_by_val
271; CHECK-NOVSX: blr
272}
273
274
275