1; RUN: opt -codegenprepare -mtriple=arm64-apple=ios -S -o - %s | FileCheck --check-prefix=OPT %s
2; RUN: llc < %s -march=arm64 | FileCheck %s
3%struct.X = type { i8, i8, [2 x i8] }
4%struct.Y = type { i32, i8 }
5%struct.Z = type { i8, i8, [2 x i8], i16 }
6%struct.A = type { i64, i8 }
7
8define void @foo(%struct.X* nocapture %x, %struct.Y* nocapture %y) nounwind optsize ssp {
9; CHECK-LABEL: foo:
10; CHECK: ubfx
11; CHECK-NOT: and
12; CHECK: ret
13
14  %tmp = bitcast %struct.X* %x to i32*
15  %tmp1 = load i32, i32* %tmp, align 4
16  %b = getelementptr inbounds %struct.Y, %struct.Y* %y, i64 0, i32 1
17  %bf.clear = lshr i32 %tmp1, 3
18  %bf.clear.lobit = and i32 %bf.clear, 1
19  %frombool = trunc i32 %bf.clear.lobit to i8
20  store i8 %frombool, i8* %b, align 1
21  ret void
22}
23
24define i32 @baz(i64 %cav1.coerce) nounwind {
25; CHECK-LABEL: baz:
26; CHECK: sbfx  w0, w0, #0, #4
27  %tmp = trunc i64 %cav1.coerce to i32
28  %tmp1 = shl i32 %tmp, 28
29  %bf.val.sext = ashr exact i32 %tmp1, 28
30  ret i32 %bf.val.sext
31}
32
33define i32 @bar(i64 %cav1.coerce) nounwind {
34; CHECK-LABEL: bar:
35; CHECK: sbfx  w0, w0, #4, #6
36  %tmp = trunc i64 %cav1.coerce to i32
37  %cav1.sroa.0.1.insert = shl i32 %tmp, 22
38  %tmp1 = ashr i32 %cav1.sroa.0.1.insert, 26
39  ret i32 %tmp1
40}
41
42define void @fct1(%struct.Z* nocapture %x, %struct.A* nocapture %y) nounwind optsize ssp {
43; CHECK-LABEL: fct1:
44; CHECK: ubfx
45; CHECK-NOT: and
46; CHECK: ret
47
48  %tmp = bitcast %struct.Z* %x to i64*
49  %tmp1 = load i64, i64* %tmp, align 4
50  %b = getelementptr inbounds %struct.A, %struct.A* %y, i64 0, i32 0
51  %bf.clear = lshr i64 %tmp1, 3
52  %bf.clear.lobit = and i64 %bf.clear, 1
53  store i64 %bf.clear.lobit, i64* %b, align 8
54  ret void
55}
56
57define i64 @fct2(i64 %cav1.coerce) nounwind {
58; CHECK-LABEL: fct2:
59; CHECK: sbfx  x0, x0, #0, #36
60  %tmp = shl i64 %cav1.coerce, 28
61  %bf.val.sext = ashr exact i64 %tmp, 28
62  ret i64 %bf.val.sext
63}
64
65define i64 @fct3(i64 %cav1.coerce) nounwind {
66; CHECK-LABEL: fct3:
67; CHECK: sbfx  x0, x0, #4, #38
68  %cav1.sroa.0.1.insert = shl i64 %cav1.coerce, 22
69  %tmp1 = ashr i64 %cav1.sroa.0.1.insert, 26
70  ret i64 %tmp1
71}
72
73define void @fct4(i64* nocapture %y, i64 %x) nounwind optsize inlinehint ssp {
74entry:
75; CHECK-LABEL: fct4:
76; CHECK: ldr [[REG1:x[0-9]+]],
77; CHECK-NEXT: bfxil [[REG1]], x1, #16, #24
78; CHECK-NEXT: str [[REG1]],
79; CHECK-NEXT: ret
80  %0 = load i64, i64* %y, align 8
81  %and = and i64 %0, -16777216
82  %shr = lshr i64 %x, 16
83  %and1 = and i64 %shr, 16777215
84  %or = or i64 %and, %and1
85  store i64 %or, i64* %y, align 8
86  ret void
87}
88
89define void @fct5(i32* nocapture %y, i32 %x) nounwind optsize inlinehint ssp {
90entry:
91; CHECK-LABEL: fct5:
92; CHECK: ldr [[REG1:w[0-9]+]],
93; CHECK-NEXT: bfxil [[REG1]], w1, #16, #3
94; CHECK-NEXT: str [[REG1]],
95; CHECK-NEXT: ret
96  %0 = load i32, i32* %y, align 8
97  %and = and i32 %0, -8
98  %shr = lshr i32 %x, 16
99  %and1 = and i32 %shr, 7
100  %or = or i32 %and, %and1
101  store i32 %or, i32* %y, align 8
102  ret void
103}
104
105; Check if we can still catch bfm instruction when we drop some low bits
106define void @fct6(i32* nocapture %y, i32 %x) nounwind optsize inlinehint ssp {
107entry:
108; CHECK-LABEL: fct6:
109; CHECK: ldr [[REG1:w[0-9]+]],
110; CHECK-NEXT: bfxil [[REG1]], w1, #16, #3
111; lsr is an alias of ubfm
112; CHECK-NEXT: lsr [[REG2:w[0-9]+]], [[REG1]], #2
113; CHECK-NEXT: str [[REG2]],
114; CHECK-NEXT: ret
115  %0 = load i32, i32* %y, align 8
116  %and = and i32 %0, -8
117  %shr = lshr i32 %x, 16
118  %and1 = and i32 %shr, 7
119  %or = or i32 %and, %and1
120  %shr1 = lshr i32 %or, 2
121  store i32 %shr1, i32* %y, align 8
122  ret void
123}
124
125
126; Check if we can still catch bfm instruction when we drop some high bits
127define void @fct7(i32* nocapture %y, i32 %x) nounwind optsize inlinehint ssp {
128entry:
129; CHECK-LABEL: fct7:
130; CHECK: ldr [[REG1:w[0-9]+]],
131; CHECK-NEXT: bfxil [[REG1]], w1, #16, #3
132; lsl is an alias of ubfm
133; CHECK-NEXT: lsl [[REG2:w[0-9]+]], [[REG1]], #2
134; CHECK-NEXT: str [[REG2]],
135; CHECK-NEXT: ret
136  %0 = load i32, i32* %y, align 8
137  %and = and i32 %0, -8
138  %shr = lshr i32 %x, 16
139  %and1 = and i32 %shr, 7
140  %or = or i32 %and, %and1
141  %shl = shl i32 %or, 2
142  store i32 %shl, i32* %y, align 8
143  ret void
144}
145
146
147; Check if we can still catch bfm instruction when we drop some low bits
148; (i64 version)
149define void @fct8(i64* nocapture %y, i64 %x) nounwind optsize inlinehint ssp {
150entry:
151; CHECK-LABEL: fct8:
152; CHECK: ldr [[REG1:x[0-9]+]],
153; CHECK-NEXT: bfxil [[REG1]], x1, #16, #3
154; lsr is an alias of ubfm
155; CHECK-NEXT: lsr [[REG2:x[0-9]+]], [[REG1]], #2
156; CHECK-NEXT: str [[REG2]],
157; CHECK-NEXT: ret
158  %0 = load i64, i64* %y, align 8
159  %and = and i64 %0, -8
160  %shr = lshr i64 %x, 16
161  %and1 = and i64 %shr, 7
162  %or = or i64 %and, %and1
163  %shr1 = lshr i64 %or, 2
164  store i64 %shr1, i64* %y, align 8
165  ret void
166}
167
168
169; Check if we can still catch bfm instruction when we drop some high bits
170; (i64 version)
171define void @fct9(i64* nocapture %y, i64 %x) nounwind optsize inlinehint ssp {
172entry:
173; CHECK-LABEL: fct9:
174; CHECK: ldr [[REG1:x[0-9]+]],
175; CHECK-NEXT: bfxil [[REG1]], x1, #16, #3
176; lsr is an alias of ubfm
177; CHECK-NEXT: lsl [[REG2:x[0-9]+]], [[REG1]], #2
178; CHECK-NEXT: str [[REG2]],
179; CHECK-NEXT: ret
180  %0 = load i64, i64* %y, align 8
181  %and = and i64 %0, -8
182  %shr = lshr i64 %x, 16
183  %and1 = and i64 %shr, 7
184  %or = or i64 %and, %and1
185  %shl = shl i64 %or, 2
186  store i64 %shl, i64* %y, align 8
187  ret void
188}
189
190; Check if we can catch bfm instruction when lsb is 0 (i.e., no lshr)
191; (i32 version)
192define void @fct10(i32* nocapture %y, i32 %x) nounwind optsize inlinehint ssp {
193entry:
194; CHECK-LABEL: fct10:
195; CHECK: ldr [[REG1:w[0-9]+]],
196; CHECK-NEXT: bfxil [[REG1]], w1, #0, #3
197; lsl is an alias of ubfm
198; CHECK-NEXT: lsl [[REG2:w[0-9]+]], [[REG1]], #2
199; CHECK-NEXT: str [[REG2]],
200; CHECK-NEXT: ret
201  %0 = load i32, i32* %y, align 8
202  %and = and i32 %0, -8
203  %and1 = and i32 %x, 7
204  %or = or i32 %and, %and1
205  %shl = shl i32 %or, 2
206  store i32 %shl, i32* %y, align 8
207  ret void
208}
209
210; Check if we can catch bfm instruction when lsb is 0 (i.e., no lshr)
211; (i64 version)
212define void @fct11(i64* nocapture %y, i64 %x) nounwind optsize inlinehint ssp {
213entry:
214; CHECK-LABEL: fct11:
215; CHECK: ldr [[REG1:x[0-9]+]],
216; CHECK-NEXT: bfxil [[REG1]], x1, #0, #3
217; lsl is an alias of ubfm
218; CHECK-NEXT: lsl [[REG2:x[0-9]+]], [[REG1]], #2
219; CHECK-NEXT: str [[REG2]],
220; CHECK-NEXT: ret
221  %0 = load i64, i64* %y, align 8
222  %and = and i64 %0, -8
223  %and1 = and i64 %x, 7
224  %or = or i64 %and, %and1
225  %shl = shl i64 %or, 2
226  store i64 %shl, i64* %y, align 8
227  ret void
228}
229
230define zeroext i1 @fct12bis(i32 %tmp2) unnamed_addr nounwind ssp align 2 {
231; CHECK-LABEL: fct12bis:
232; CHECK-NOT: and
233; CHECK: ubfx w0, w0, #11, #1
234  %and.i.i = and i32 %tmp2, 2048
235  %tobool.i.i = icmp ne i32 %and.i.i, 0
236  ret i1 %tobool.i.i
237}
238
239; Check if we can still catch bfm instruction when we drop some high bits
240; and some low bits
241define void @fct12(i32* nocapture %y, i32 %x) nounwind optsize inlinehint ssp {
242entry:
243; CHECK-LABEL: fct12:
244; CHECK: ldr [[REG1:w[0-9]+]],
245; CHECK-NEXT: bfxil [[REG1]], w1, #16, #3
246; lsr is an alias of ubfm
247; CHECK-NEXT: ubfx [[REG2:w[0-9]+]], [[REG1]], #2, #28
248; CHECK-NEXT: str [[REG2]],
249; CHECK-NEXT: ret
250  %0 = load i32, i32* %y, align 8
251  %and = and i32 %0, -8
252  %shr = lshr i32 %x, 16
253  %and1 = and i32 %shr, 7
254  %or = or i32 %and, %and1
255  %shl = shl i32 %or, 2
256  %shr2 = lshr i32 %shl, 4
257  store i32 %shr2, i32* %y, align 8
258  ret void
259}
260
261; Check if we can still catch bfm instruction when we drop some high bits
262; and some low bits
263; (i64 version)
264define void @fct13(i64* nocapture %y, i64 %x) nounwind optsize inlinehint ssp {
265entry:
266; CHECK-LABEL: fct13:
267; CHECK: ldr [[REG1:x[0-9]+]],
268; CHECK-NEXT: bfxil [[REG1]], x1, #16, #3
269; lsr is an alias of ubfm
270; CHECK-NEXT: ubfx [[REG2:x[0-9]+]], [[REG1]], #2, #60
271; CHECK-NEXT: str [[REG2]],
272; CHECK-NEXT: ret
273  %0 = load i64, i64* %y, align 8
274  %and = and i64 %0, -8
275  %shr = lshr i64 %x, 16
276  %and1 = and i64 %shr, 7
277  %or = or i64 %and, %and1
278  %shl = shl i64 %or, 2
279  %shr2 = lshr i64 %shl, 4
280  store i64 %shr2, i64* %y, align 8
281  ret void
282}
283
284
285; Check if we can still catch bfm instruction when we drop some high bits
286; and some low bits
287define void @fct14(i32* nocapture %y, i32 %x, i32 %x1) nounwind optsize inlinehint ssp {
288entry:
289; CHECK-LABEL: fct14:
290; CHECK: ldr [[REG1:w[0-9]+]],
291; CHECK-NEXT: bfxil [[REG1]], w1, #16, #8
292; lsr is an alias of ubfm
293; CHECK-NEXT: lsr [[REG2:w[0-9]+]], [[REG1]], #4
294; CHECK-NEXT: bfxil [[REG2]], w2, #5, #3
295; lsl is an alias of ubfm
296; CHECK-NEXT: lsl [[REG3:w[0-9]+]], [[REG2]], #2
297; CHECK-NEXT: str [[REG3]],
298; CHECK-NEXT: ret
299  %0 = load i32, i32* %y, align 8
300  %and = and i32 %0, -256
301  %shr = lshr i32 %x, 16
302  %and1 = and i32 %shr, 255
303  %or = or i32 %and, %and1
304  %shl = lshr i32 %or, 4
305  %and2 = and i32 %shl, -8
306  %shr1 = lshr i32 %x1, 5
307  %and3 = and i32 %shr1, 7
308  %or1 = or i32 %and2, %and3
309  %shl1 = shl i32 %or1, 2
310  store i32 %shl1, i32* %y, align 8
311  ret void
312}
313
314; Check if we can still catch bfm instruction when we drop some high bits
315; and some low bits
316; (i64 version)
317define void @fct15(i64* nocapture %y, i64 %x, i64 %x1) nounwind optsize inlinehint ssp {
318entry:
319; CHECK-LABEL: fct15:
320; CHECK: ldr [[REG1:x[0-9]+]],
321; CHECK-NEXT: bfxil [[REG1]], x1, #16, #8
322; lsr is an alias of ubfm
323; CHECK-NEXT: lsr [[REG2:x[0-9]+]], [[REG1]], #4
324; CHECK-NEXT: bfxil [[REG2]], x2, #5, #3
325; lsl is an alias of ubfm
326; CHECK-NEXT: lsl [[REG3:x[0-9]+]], [[REG2]], #2
327; CHECK-NEXT: str [[REG3]],
328; CHECK-NEXT: ret
329  %0 = load i64, i64* %y, align 8
330  %and = and i64 %0, -256
331  %shr = lshr i64 %x, 16
332  %and1 = and i64 %shr, 255
333  %or = or i64 %and, %and1
334  %shl = lshr i64 %or, 4
335  %and2 = and i64 %shl, -8
336  %shr1 = lshr i64 %x1, 5
337  %and3 = and i64 %shr1, 7
338  %or1 = or i64 %and2, %and3
339  %shl1 = shl i64 %or1, 2
340  store i64 %shl1, i64* %y, align 8
341  ret void
342}
343
344; Check if we can still catch bfm instruction when we drop some high bits
345; and some low bits and a masking operation has to be kept
346define void @fct16(i32* nocapture %y, i32 %x) nounwind optsize inlinehint ssp {
347entry:
348; CHECK-LABEL: fct16:
349; CHECK: ldr [[REG1:w[0-9]+]],
350; Create the constant
351; CHECK: movz [[REGCST:w[0-9]+]], #0x1a, lsl #16
352; CHECK: movk [[REGCST]], #0x8160
353; Do the masking
354; CHECK: and [[REG2:w[0-9]+]], [[REG1]], [[REGCST]]
355; CHECK-NEXT: bfxil [[REG2]], w1, #16, #3
356; lsr is an alias of ubfm
357; CHECK-NEXT: ubfx [[REG3:w[0-9]+]], [[REG2]], #2, #28
358; CHECK-NEXT: str [[REG3]],
359; CHECK-NEXT: ret
360  %0 = load i32, i32* %y, align 8
361  %and = and i32 %0, 1737056
362  %shr = lshr i32 %x, 16
363  %and1 = and i32 %shr, 7
364  %or = or i32 %and, %and1
365  %shl = shl i32 %or, 2
366  %shr2 = lshr i32 %shl, 4
367  store i32 %shr2, i32* %y, align 8
368  ret void
369}
370
371
372; Check if we can still catch bfm instruction when we drop some high bits
373; and some low bits and a masking operation has to be kept
374; (i64 version)
375define void @fct17(i64* nocapture %y, i64 %x) nounwind optsize inlinehint ssp {
376entry:
377; CHECK-LABEL: fct17:
378; CHECK: ldr [[REG1:x[0-9]+]],
379; Create the constant
380; CHECK: movz w[[REGCST:[0-9]+]], #0x1a, lsl #16
381; CHECK: movk w[[REGCST]], #0x8160
382; Do the masking
383; CHECK: and [[REG2:x[0-9]+]], [[REG1]], x[[REGCST]]
384; CHECK-NEXT: bfxil [[REG2]], x1, #16, #3
385; lsr is an alias of ubfm
386; CHECK-NEXT: ubfx [[REG3:x[0-9]+]], [[REG2]], #2, #60
387; CHECK-NEXT: str [[REG3]],
388; CHECK-NEXT: ret
389  %0 = load i64, i64* %y, align 8
390  %and = and i64 %0, 1737056
391  %shr = lshr i64 %x, 16
392  %and1 = and i64 %shr, 7
393  %or = or i64 %and, %and1
394  %shl = shl i64 %or, 2
395  %shr2 = lshr i64 %shl, 4
396  store i64 %shr2, i64* %y, align 8
397  ret void
398}
399
400define i64 @fct18(i32 %xor72) nounwind ssp {
401; CHECK-LABEL: fct18:
402; CHECK: ubfx x0, x0, #9, #8
403  %shr81 = lshr i32 %xor72, 9
404  %conv82 = zext i32 %shr81 to i64
405  %result = and i64 %conv82, 255
406  ret i64 %result
407}
408
409; Using the access to the global array to keep the instruction and control flow.
410@first_ones = external global [65536 x i8]
411
412; Function Attrs: nounwind readonly ssp
413define i32 @fct19(i64 %arg1) nounwind readonly ssp  {
414; CHECK-LABEL: fct19:
415entry:
416  %x.sroa.1.0.extract.shift = lshr i64 %arg1, 16
417  %x.sroa.1.0.extract.trunc = trunc i64 %x.sroa.1.0.extract.shift to i16
418  %x.sroa.3.0.extract.shift = lshr i64 %arg1, 32
419  %x.sroa.5.0.extract.shift = lshr i64 %arg1, 48
420  %tobool = icmp eq i64 %x.sroa.5.0.extract.shift, 0
421  br i1 %tobool, label %if.end, label %if.then
422
423if.then:                                          ; preds = %entry
424  %arrayidx3 = getelementptr inbounds [65536 x i8], [65536 x i8]* @first_ones, i64 0, i64 %x.sroa.5.0.extract.shift
425  %0 = load i8, i8* %arrayidx3, align 1
426  %conv = zext i8 %0 to i32
427  br label %return
428
429; OPT-LABEL: if.end
430if.end:                                           ; preds = %entry
431; OPT: lshr
432; CHECK: ubfx	[[REG1:x[0-9]+]], [[REG2:x[0-9]+]], #32, #16
433  %x.sroa.3.0.extract.trunc = trunc i64 %x.sroa.3.0.extract.shift to i16
434  %tobool6 = icmp eq i16 %x.sroa.3.0.extract.trunc, 0
435; CHECK: cbz
436  br i1 %tobool6, label %if.end13, label %if.then7
437
438; OPT-LABEL: if.then7
439if.then7:                                         ; preds = %if.end
440; OPT: lshr
441; "and" should be combined to "ubfm" while "ubfm" should be removed by cse.
442; So neither of them should be in the assemble code.
443; CHECK-NOT: and
444; CHECK-NOT: ubfm
445  %idxprom10 = and i64 %x.sroa.3.0.extract.shift, 65535
446  %arrayidx11 = getelementptr inbounds [65536 x i8], [65536 x i8]* @first_ones, i64 0, i64 %idxprom10
447  %1 = load i8, i8* %arrayidx11, align 1
448  %conv12 = zext i8 %1 to i32
449  %add = add nsw i32 %conv12, 16
450  br label %return
451
452; OPT-LABEL: if.end13
453if.end13:                                         ; preds = %if.end
454; OPT: lshr
455; OPT: trunc
456; CHECK: ubfx	[[REG3:x[0-9]+]], [[REG4:x[0-9]+]], #16, #16
457  %tobool16 = icmp eq i16 %x.sroa.1.0.extract.trunc, 0
458; CHECK: cbz
459  br i1 %tobool16, label %return, label %if.then17
460
461; OPT-LABEL: if.then17
462if.then17:                                        ; preds = %if.end13
463; OPT: lshr
464; "and" should be combined to "ubfm" while "ubfm" should be removed by cse.
465; So neither of them should be in the assemble code.
466; CHECK-NOT: and
467; CHECK-NOT: ubfm
468  %idxprom20 = and i64 %x.sroa.1.0.extract.shift, 65535
469  %arrayidx21 = getelementptr inbounds [65536 x i8], [65536 x i8]* @first_ones, i64 0, i64 %idxprom20
470  %2 = load i8, i8* %arrayidx21, align 1
471  %conv22 = zext i8 %2 to i32
472  %add23 = add nsw i32 %conv22, 32
473  br label %return
474
475return:                                           ; preds = %if.end13, %if.then17, %if.then7, %if.then
476; CHECK: ret
477  %retval.0 = phi i32 [ %conv, %if.then ], [ %add, %if.then7 ], [ %add23, %if.then17 ], [ 64, %if.end13 ]
478  ret i32 %retval.0
479}
480
481; Make sure we do not assert if the immediate in and is bigger than i64.
482; PR19503.
483; OPT-LABEL: @fct20
484; OPT: lshr
485; OPT-NOT: lshr
486; OPT: ret
487; CHECK-LABEL: fct20:
488; CHECK: ret
489define i80 @fct20(i128 %a, i128 %b) {
490entry:
491  %shr = lshr i128 %a, 18
492  %conv = trunc i128 %shr to i80
493  %tobool = icmp eq i128 %b, 0
494  br i1 %tobool, label %then, label %end
495then:
496  %and = and i128 %shr, 483673642326615442599424
497  %conv2 = trunc i128 %and to i80
498  br label %end
499end:
500  %conv3 = phi i80 [%conv, %entry], [%conv2, %then]
501  ret i80 %conv3
502}
503
504; Check if we can still catch UBFX when "AND" is used by SHL.
505; CHECK-LABEL: fct21:
506; CHECK: ubfx
507@arr = external global [8 x [64 x i64]]
508define i64 @fct21(i64 %x) {
509entry:
510  %shr = lshr i64 %x, 4
511  %and = and i64 %shr, 15
512  %arrayidx = getelementptr inbounds [8 x [64 x i64]], [8 x [64 x i64]]* @arr, i64 0, i64 0, i64 %and
513  %0 = load i64, i64* %arrayidx, align 8
514  ret i64 %0
515}
516
517define i16 @test_ignored_rightbits(i32 %dst, i32 %in) {
518; CHECK-LABEL: test_ignored_rightbits:
519
520  %positioned_field = shl i32 %in, 3
521  %positioned_masked_field = and i32 %positioned_field, 120
522  %masked_dst = and i32 %dst, 7
523  %insertion = or i32 %masked_dst, %positioned_masked_field
524; CHECK: {{bfm|bfi|bfxil}}
525
526  %shl16 = shl i32 %insertion, 8
527  %or18 = or i32 %shl16, %insertion
528  %conv19 = trunc i32 %or18 to i16
529; CHECK: bfi {{w[0-9]+}}, {{w[0-9]+}}, #8, #7
530
531  ret i16 %conv19
532}
533