1; RUN: llc -verify-machineinstrs %s -o - -mtriple=aarch64-linux-gnu -aarch64-enable-atomic-cfg-tidy=0 | FileCheck %s
2
3@var8 = global i8 0
4@var16 = global i16 0
5@var32 = global i32 0
6@var64 = global i64 0
7
8define void @addsub_i8rhs() minsize {
9; CHECK-LABEL: addsub_i8rhs:
10    %val8_tmp = load i8, i8* @var8
11    %lhs32 = load i32, i32* @var32
12    %lhs64 = load i64, i64* @var64
13
14    ; Need this to prevent extension upon load and give a vanilla i8 operand.
15    %val8 = add i8 %val8_tmp, 123
16
17
18; Zero-extending to 32-bits
19    %rhs32_zext = zext i8 %val8 to i32
20    %res32_zext = add i32 %lhs32, %rhs32_zext
21    store volatile i32 %res32_zext, i32* @var32
22; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, uxtb
23
24   %rhs32_zext_shift = shl i32 %rhs32_zext, 3
25   %res32_zext_shift = add i32 %lhs32, %rhs32_zext_shift
26   store volatile i32 %res32_zext_shift, i32* @var32
27; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, uxtb #3
28
29
30; Zero-extending to 64-bits
31    %rhs64_zext = zext i8 %val8 to i64
32    %res64_zext = add i64 %lhs64, %rhs64_zext
33    store volatile i64 %res64_zext, i64* @var64
34; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxtb
35
36   %rhs64_zext_shift = shl i64 %rhs64_zext, 1
37   %res64_zext_shift = add i64 %lhs64, %rhs64_zext_shift
38   store volatile i64 %res64_zext_shift, i64* @var64
39; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxtb #1
40
41; Sign-extending to 32-bits
42    %rhs32_sext = sext i8 %val8 to i32
43    %res32_sext = add i32 %lhs32, %rhs32_sext
44    store volatile i32 %res32_sext, i32* @var32
45; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, sxtb
46
47   %rhs32_sext_shift = shl i32 %rhs32_sext, 1
48   %res32_sext_shift = add i32 %lhs32, %rhs32_sext_shift
49   store volatile i32 %res32_sext_shift, i32* @var32
50; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, sxtb #1
51
52; Sign-extending to 64-bits
53    %rhs64_sext = sext i8 %val8 to i64
54    %res64_sext = add i64 %lhs64, %rhs64_sext
55    store volatile i64 %res64_sext, i64* @var64
56; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, sxtb
57
58   %rhs64_sext_shift = shl i64 %rhs64_sext, 4
59   %res64_sext_shift = add i64 %lhs64, %rhs64_sext_shift
60   store volatile i64 %res64_sext_shift, i64* @var64
61; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, sxtb #4
62
63
64; CMP variants
65    %tst = icmp slt i32 %lhs32, %rhs32_zext
66    br i1 %tst, label %end, label %test2
67; CHECK: cmp {{w[0-9]+}}, {{w[0-9]+}}, uxtb
68
69test2:
70    %cmp_sext = sext i8 %val8 to i64
71    %tst2 = icmp eq i64 %lhs64, %cmp_sext
72    br i1 %tst2, label %other, label %end
73; CHECK: cmp {{x[0-9]+}}, {{w[0-9]+}}, sxtb
74
75other:
76    store volatile i32 %lhs32, i32* @var32
77    ret void
78
79end:
80    ret void
81}
82
83define void @sub_i8rhs() minsize {
84; CHECK-LABEL: sub_i8rhs:
85    %val8_tmp = load i8, i8* @var8
86    %lhs32 = load i32, i32* @var32
87    %lhs64 = load i64, i64* @var64
88
89    ; Need this to prevent extension upon load and give a vanilla i8 operand.
90    %val8 = add i8 %val8_tmp, 123
91
92
93; Zero-extending to 32-bits
94    %rhs32_zext = zext i8 %val8 to i32
95    %res32_zext = sub i32 %lhs32, %rhs32_zext
96    store volatile i32 %res32_zext, i32* @var32
97; CHECK: sub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, uxtb
98
99   %rhs32_zext_shift = shl i32 %rhs32_zext, 3
100   %res32_zext_shift = sub i32 %lhs32, %rhs32_zext_shift
101   store volatile i32 %res32_zext_shift, i32* @var32
102; CHECK: sub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, uxtb #3
103
104
105; Zero-extending to 64-bits
106    %rhs64_zext = zext i8 %val8 to i64
107    %res64_zext = sub i64 %lhs64, %rhs64_zext
108    store volatile i64 %res64_zext, i64* @var64
109; CHECK: sub {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxtb
110
111   %rhs64_zext_shift = shl i64 %rhs64_zext, 1
112   %res64_zext_shift = sub i64 %lhs64, %rhs64_zext_shift
113   store volatile i64 %res64_zext_shift, i64* @var64
114; CHECK: sub {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxtb #1
115
116; Sign-extending to 32-bits
117    %rhs32_sext = sext i8 %val8 to i32
118    %res32_sext = sub i32 %lhs32, %rhs32_sext
119    store volatile i32 %res32_sext, i32* @var32
120; CHECK: sub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, sxtb
121
122   %rhs32_sext_shift = shl i32 %rhs32_sext, 1
123   %res32_sext_shift = sub i32 %lhs32, %rhs32_sext_shift
124   store volatile i32 %res32_sext_shift, i32* @var32
125; CHECK: sub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, sxtb #1
126
127; Sign-extending to 64-bits
128    %rhs64_sext = sext i8 %val8 to i64
129    %res64_sext = sub i64 %lhs64, %rhs64_sext
130    store volatile i64 %res64_sext, i64* @var64
131; CHECK: sub {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, sxtb
132
133   %rhs64_sext_shift = shl i64 %rhs64_sext, 4
134   %res64_sext_shift = sub i64 %lhs64, %rhs64_sext_shift
135   store volatile i64 %res64_sext_shift, i64* @var64
136; CHECK: sub {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, sxtb #4
137
138    ret void
139}
140
141define void @addsub_i16rhs() minsize {
142; CHECK-LABEL: addsub_i16rhs:
143    %val16_tmp = load i16, i16* @var16
144    %lhs32 = load i32, i32* @var32
145    %lhs64 = load i64, i64* @var64
146
147    ; Need this to prevent extension upon load and give a vanilla i16 operand.
148    %val16 = add i16 %val16_tmp, 123
149
150
151; Zero-extending to 32-bits
152    %rhs32_zext = zext i16 %val16 to i32
153    %res32_zext = add i32 %lhs32, %rhs32_zext
154    store volatile i32 %res32_zext, i32* @var32
155; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, uxth
156
157   %rhs32_zext_shift = shl i32 %rhs32_zext, 3
158   %res32_zext_shift = add i32 %lhs32, %rhs32_zext_shift
159   store volatile i32 %res32_zext_shift, i32* @var32
160; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, uxth #3
161
162
163; Zero-extending to 64-bits
164    %rhs64_zext = zext i16 %val16 to i64
165    %res64_zext = add i64 %lhs64, %rhs64_zext
166    store volatile i64 %res64_zext, i64* @var64
167; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxth
168
169   %rhs64_zext_shift = shl i64 %rhs64_zext, 1
170   %res64_zext_shift = add i64 %lhs64, %rhs64_zext_shift
171   store volatile i64 %res64_zext_shift, i64* @var64
172; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxth #1
173
174; Sign-extending to 32-bits
175    %rhs32_sext = sext i16 %val16 to i32
176    %res32_sext = add i32 %lhs32, %rhs32_sext
177    store volatile i32 %res32_sext, i32* @var32
178; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, sxth
179
180   %rhs32_sext_shift = shl i32 %rhs32_sext, 1
181   %res32_sext_shift = add i32 %lhs32, %rhs32_sext_shift
182   store volatile i32 %res32_sext_shift, i32* @var32
183; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, sxth #1
184
185; Sign-extending to 64-bits
186    %rhs64_sext = sext i16 %val16 to i64
187    %res64_sext = add i64 %lhs64, %rhs64_sext
188    store volatile i64 %res64_sext, i64* @var64
189; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, sxth
190
191   %rhs64_sext_shift = shl i64 %rhs64_sext, 4
192   %res64_sext_shift = add i64 %lhs64, %rhs64_sext_shift
193   store volatile i64 %res64_sext_shift, i64* @var64
194; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, sxth #4
195
196
197; CMP variants
198    %tst = icmp slt i32 %lhs32, %rhs32_zext
199    br i1 %tst, label %end, label %test2
200; CHECK: cmp {{w[0-9]+}}, {{w[0-9]+}}, uxth
201
202test2:
203    %cmp_sext = sext i16 %val16 to i64
204    %tst2 = icmp eq i64 %lhs64, %cmp_sext
205    br i1 %tst2, label %other, label %end
206; CHECK: cmp {{x[0-9]+}}, {{w[0-9]+}}, sxth
207
208other:
209    store volatile i32 %lhs32, i32* @var32
210    ret void
211
212end:
213    ret void
214}
215
216define void @sub_i16rhs() minsize {
217; CHECK-LABEL: sub_i16rhs:
218    %val16_tmp = load i16, i16* @var16
219    %lhs32 = load i32, i32* @var32
220    %lhs64 = load i64, i64* @var64
221
222    ; Need this to prevent extension upon load and give a vanilla i16 operand.
223    %val16 = add i16 %val16_tmp, 123
224
225
226; Zero-extending to 32-bits
227    %rhs32_zext = zext i16 %val16 to i32
228    %res32_zext = sub i32 %lhs32, %rhs32_zext
229    store volatile i32 %res32_zext, i32* @var32
230; CHECK: sub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, uxth
231
232   %rhs32_zext_shift = shl i32 %rhs32_zext, 3
233   %res32_zext_shift = sub i32 %lhs32, %rhs32_zext_shift
234   store volatile i32 %res32_zext_shift, i32* @var32
235; CHECK: sub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, uxth #3
236
237
238; Zero-extending to 64-bits
239    %rhs64_zext = zext i16 %val16 to i64
240    %res64_zext = sub i64 %lhs64, %rhs64_zext
241    store volatile i64 %res64_zext, i64* @var64
242; CHECK: sub {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxth
243
244   %rhs64_zext_shift = shl i64 %rhs64_zext, 1
245   %res64_zext_shift = sub i64 %lhs64, %rhs64_zext_shift
246   store volatile i64 %res64_zext_shift, i64* @var64
247; CHECK: sub {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxth #1
248
249; Sign-extending to 32-bits
250    %rhs32_sext = sext i16 %val16 to i32
251    %res32_sext = sub i32 %lhs32, %rhs32_sext
252    store volatile i32 %res32_sext, i32* @var32
253; CHECK: sub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, sxth
254
255   %rhs32_sext_shift = shl i32 %rhs32_sext, 1
256   %res32_sext_shift = sub i32 %lhs32, %rhs32_sext_shift
257   store volatile i32 %res32_sext_shift, i32* @var32
258; CHECK: sub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, sxth #1
259
260; Sign-extending to 64-bits
261    %rhs64_sext = sext i16 %val16 to i64
262    %res64_sext = sub i64 %lhs64, %rhs64_sext
263    store volatile i64 %res64_sext, i64* @var64
264; CHECK: sub {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, sxth
265
266   %rhs64_sext_shift = shl i64 %rhs64_sext, 4
267   %res64_sext_shift = sub i64 %lhs64, %rhs64_sext_shift
268   store volatile i64 %res64_sext_shift, i64* @var64
269; CHECK: sub {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, sxth #4
270
271    ret void
272}
273
274; N.b. we could probably check more here ("add w2, w3, w1, uxtw" for
275; example), but the remaining instructions are probably not idiomatic
276; in the face of "add/sub (shifted register)" so I don't intend to.
277define void @addsub_i32rhs(i32 %in32) minsize {
278; CHECK-LABEL: addsub_i32rhs:
279    %val32_tmp = load i32, i32* @var32
280    %lhs64 = load i64, i64* @var64
281
282    %val32 = add i32 %val32_tmp, 123
283
284    %rhs64_zext = zext i32 %in32 to i64
285    %res64_zext = add i64 %lhs64, %rhs64_zext
286    store volatile i64 %res64_zext, i64* @var64
287; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxtw
288
289    %rhs64_zext2 = zext i32 %val32 to i64
290    %rhs64_zext_shift = shl i64 %rhs64_zext2, 2
291    %res64_zext_shift = add i64 %lhs64, %rhs64_zext_shift
292    store volatile i64 %res64_zext_shift, i64* @var64
293; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxtw #2
294
295    %rhs64_sext = sext i32 %val32 to i64
296    %res64_sext = add i64 %lhs64, %rhs64_sext
297    store volatile i64 %res64_sext, i64* @var64
298; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, sxtw
299
300    %rhs64_sext_shift = shl i64 %rhs64_sext, 2
301    %res64_sext_shift = add i64 %lhs64, %rhs64_sext_shift
302    store volatile i64 %res64_sext_shift, i64* @var64
303; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, sxtw #2
304
305    ret void
306}
307
308define void @sub_i32rhs(i32 %in32) minsize {
309; CHECK-LABEL: sub_i32rhs:
310    %val32_tmp = load i32, i32* @var32
311    %lhs64 = load i64, i64* @var64
312
313    %val32 = add i32 %val32_tmp, 123
314
315    %rhs64_zext = zext i32 %in32 to i64
316    %res64_zext = sub i64 %lhs64, %rhs64_zext
317    store volatile i64 %res64_zext, i64* @var64
318; CHECK: sub {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxtw
319
320    %rhs64_zext2 = zext i32 %val32 to i64
321    %rhs64_zext_shift = shl i64 %rhs64_zext2, 2
322    %res64_zext_shift = sub i64 %lhs64, %rhs64_zext_shift
323    store volatile i64 %res64_zext_shift, i64* @var64
324; CHECK: sub {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxtw #2
325
326    %rhs64_sext = sext i32 %val32 to i64
327    %res64_sext = sub i64 %lhs64, %rhs64_sext
328    store volatile i64 %res64_sext, i64* @var64
329; CHECK: sub {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, sxtw
330
331    %rhs64_sext_shift = shl i64 %rhs64_sext, 2
332    %res64_sext_shift = sub i64 %lhs64, %rhs64_sext_shift
333    store volatile i64 %res64_sext_shift, i64* @var64
334; CHECK: sub {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, sxtw #2
335
336    ret void
337}
338
339; Check that implicit zext from w reg write is used instead of uxtw form of add.
340define i64 @add_fold_uxtw(i32 %x, i64 %y) {
341; CHECK-LABEL: add_fold_uxtw:
342entry:
343; CHECK: and w[[TMP:[0-9]+]], w0, #0x3
344  %m = and i32 %x, 3
345  %ext = zext i32 %m to i64
346; CHECK-NEXT: add x0, x1, x[[TMP]]
347  %ret = add i64 %y, %ext
348  ret i64 %ret
349}
350
351; Check that implicit zext from w reg write is used instead of uxtw
352; form of sub and that mov WZR is folded to form a neg instruction.
353define i64 @sub_fold_uxtw_xzr(i32 %x)  {
354; CHECK-LABEL: sub_fold_uxtw_xzr:
355entry:
356; CHECK: and w[[TMP:[0-9]+]], w0, #0x3
357  %m = and i32 %x, 3
358  %ext = zext i32 %m to i64
359; CHECK-NEXT: neg x0, x[[TMP]]
360  %ret = sub i64 0, %ext
361  ret i64 %ret
362}
363
364; Check that implicit zext from w reg write is used instead of uxtw form of subs/cmp.
365define i1 @cmp_fold_uxtw(i32 %x, i64 %y) {
366; CHECK-LABEL: cmp_fold_uxtw:
367entry:
368; CHECK: and w[[TMP:[0-9]+]], w0, #0x3
369  %m = and i32 %x, 3
370  %ext = zext i32 %m to i64
371; CHECK-NEXT: cmp x1, x[[TMP]]
372; CHECK-NEXT: cset
373  %ret = icmp eq i64 %y, %ext
374  ret i1 %ret
375}
376
377; Check that implicit zext from w reg write is used instead of uxtw
378; form of add, leading to madd selection.
379define i64 @madd_fold_uxtw(i32 %x, i64 %y) {
380; CHECK-LABEL: madd_fold_uxtw:
381entry:
382; CHECK: and w[[TMP:[0-9]+]], w0, #0x3
383  %m = and i32 %x, 3
384  %ext = zext i32 %m to i64
385; CHECK-NEXT: madd x0, x1, x1, x[[TMP]]
386  %mul = mul i64 %y, %y
387  %ret = add i64 %mul, %ext
388  ret i64 %ret
389}
390
391; Check that implicit zext from w reg write is used instead of uxtw
392; form of sub, leading to sub/cmp folding.
393; Check that implicit zext from w reg write is used instead of uxtw form of subs/cmp.
394define i1 @cmp_sub_fold_uxtw(i32 %x, i64 %y, i64 %z) {
395; CHECK-LABEL: cmp_sub_fold_uxtw:
396entry:
397; CHECK: and w[[TMP:[0-9]+]], w0, #0x3
398  %m = and i32 %x, 3
399  %ext = zext i32 %m to i64
400; CHECK-NEXT: cmp x[[TMP2:[0-9]+]], x[[TMP]]
401; CHECK-NEXT: cset
402  %sub = sub i64 %z, %ext
403  %ret = icmp eq i64 %sub, 0
404  ret i1 %ret
405}
406
407; Check that implicit zext from w reg write is used instead of uxtw
408; form of add and add of -1 gets selected as sub.
409define i64 @add_imm_fold_uxtw(i32 %x) {
410; CHECK-LABEL: add_imm_fold_uxtw:
411entry:
412; CHECK: and w[[TMP:[0-9]+]], w0, #0x3
413  %m = and i32 %x, 3
414  %ext = zext i32 %m to i64
415; CHECK-NEXT: sub x0, x[[TMP]], #1
416  %ret = add i64 %ext, -1
417  ret i64 %ret
418}
419
420; Check that implicit zext from w reg write is used instead of uxtw
421; form of add and add lsl form gets selected.
422define i64 @add_lsl_fold_uxtw(i32 %x, i64 %y) {
423; CHECK-LABEL: add_lsl_fold_uxtw:
424entry:
425; CHECK: orr w[[TMP:[0-9]+]], w0, #0x3
426  %m = or i32 %x, 3
427  %ext = zext i32 %m to i64
428  %shift = shl i64 %y, 3
429; CHECK-NEXT: add x0, x[[TMP]], x1, lsl #3
430  %ret = add i64 %ext, %shift
431  ret i64 %ret
432}
433