1# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2# RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve -run-pass=arm-low-overhead-loops %s -o - | FileCheck %s
3
4# CHECK-NOT: LETP
5
6--- |
7  define arm_aapcs_vfpcc void @test_ctlz_i8(<8 x i16>* %a, <8 x i16>* %b, <8 x i16>* %c, i32 %elts, i32 %iters) #0 {
8  entry:
9    %cmp = icmp slt i32 %elts, 1
10    br i1 %cmp, label %exit, label %loop.ph
11
12  loop.ph:                                          ; preds = %entry
13    %start = call i32 @llvm.start.loop.iterations.i32(i32 %iters)
14    br label %loop.body
15
16  loop.body:                                        ; preds = %loop.body, %loop.ph
17    %lsr.iv = phi i32 [ %lsr.iv.next, %loop.body ], [ %start, %loop.ph ]
18    %count = phi i32 [ %elts, %loop.ph ], [ %elts.rem, %loop.body ]
19    %addr.a = phi <8 x i16>* [ %a, %loop.ph ], [ %addr.a.next, %loop.body ]
20    %addr.b = phi <8 x i16>* [ %b, %loop.ph ], [ %addr.b.next, %loop.body ]
21    %addr.c = phi <8 x i16>* [ %c, %loop.ph ], [ %addr.c.next, %loop.body ]
22    %pred = call <8 x i1> @llvm.arm.mve.vctp16(i32 %count)
23    %elts.rem = sub i32 %count, 8
24    %masked.load.a = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %addr.a, i32 2, <8 x i1> %pred, <8 x i16> undef)
25    %masked.load.b = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %addr.b, i32 2, <8 x i1> %pred, <8 x i16> undef)
26    %bitcast.a = bitcast <8 x i16> %masked.load.a to <16 x i8>
27    %ctlz = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %bitcast.a, i1 false)
28    %shrn = call <16 x i8> @llvm.arm.mve.vshrn.v16i8.v8i16(<16 x i8> %ctlz, <8 x i16> %masked.load.b, i32 1, i32 1, i32 0, i32 1, i32 0, i32 1)
29    %bitcast = bitcast <16 x i8> %shrn to <8 x i16>
30    call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> %bitcast, <8 x i16>* %addr.c, i32 2, <8 x i1> %pred)
31    %addr.a.next = getelementptr <8 x i16>, <8 x i16>* %addr.b, i32 1
32    %addr.b.next = getelementptr <8 x i16>, <8 x i16>* %addr.b, i32 1
33    %addr.c.next = getelementptr <8 x i16>, <8 x i16>* %addr.c, i32 1
34    %loop.dec = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %lsr.iv, i32 1)
35    %end = icmp ne i32 %loop.dec, 0
36    %lsr.iv.next = add i32 %lsr.iv, -1
37    br i1 %end, label %loop.body, label %exit
38
39  exit:                                             ; preds = %loop.body, %entry
40    ret void
41  }
42
43  define arm_aapcs_vfpcc void @test_ctlz_i16(<4 x i32>* %a, <4 x i32>* %b, <4 x i32>* %c, i32 %elts, i32 %iters) #0 {
44  entry:
45    %cmp = icmp slt i32 %elts, 1
46    br i1 %cmp, label %exit, label %loop.ph
47
48  loop.ph:                                          ; preds = %entry
49    %start = call i32 @llvm.start.loop.iterations.i32(i32 %iters)
50    br label %loop.body
51
52  loop.body:                                        ; preds = %loop.body, %loop.ph
53    %lsr.iv = phi i32 [ %lsr.iv.next, %loop.body ], [ %start, %loop.ph ]
54    %count = phi i32 [ %elts, %loop.ph ], [ %elts.rem, %loop.body ]
55    %addr.a = phi <4 x i32>* [ %a, %loop.ph ], [ %addr.a.next, %loop.body ]
56    %addr.b = phi <4 x i32>* [ %b, %loop.ph ], [ %addr.b.next, %loop.body ]
57    %addr.c = phi <4 x i32>* [ %c, %loop.ph ], [ %addr.c.next, %loop.body ]
58    %pred = call <4 x i1> @llvm.arm.mve.vctp32(i32 %count)
59    %elts.rem = sub i32 %count, 4
60    %masked.load.a = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %addr.a, i32 4, <4 x i1> %pred, <4 x i32> undef)
61    %masked.load.b = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %addr.b, i32 4, <4 x i1> %pred, <4 x i32> undef)
62    %bitcast.a = bitcast <4 x i32> %masked.load.a to <8 x i16>
63    %ctlz = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %bitcast.a, i1 false)
64    %shrn = call <8 x i16> @llvm.arm.mve.vshrn.v8i16.v4i32(<8 x i16> %ctlz, <4 x i32> %masked.load.b, i32 3, i32 1, i32 0, i32 1, i32 0, i32 1)
65    %bitcast = bitcast <8 x i16> %shrn to <4 x i32>
66    call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %bitcast, <4 x i32>* %addr.c, i32 4, <4 x i1> %pred)
67    %addr.a.next = getelementptr <4 x i32>, <4 x i32>* %addr.a, i32 1
68    %addr.b.next = getelementptr <4 x i32>, <4 x i32>* %addr.b, i32 1
69    %addr.c.next = getelementptr <4 x i32>, <4 x i32>* %addr.c, i32 1
70    %loop.dec = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %lsr.iv, i32 1)
71    %end = icmp ne i32 %loop.dec, 0
72    %lsr.iv.next = add i32 %lsr.iv, -1
73    br i1 %end, label %loop.body, label %exit
74
75  exit:                                             ; preds = %loop.body, %entry
76    ret void
77  }
78
79  define arm_aapcs_vfpcc void @test_ctlz_i32(<4 x i32>* %a, <4 x i32>* %b, <4 x i32>* %c, i32 %elts, i32 %iters) #0 {
80  entry:
81    %cmp = icmp slt i32 %elts, 1
82    br i1 %cmp, label %exit, label %loop.ph
83
84  loop.ph:                                          ; preds = %entry
85    %start = call i32 @llvm.start.loop.iterations.i32(i32 %iters)
86    br label %loop.body
87
88  loop.body:                                        ; preds = %loop.body, %loop.ph
89    %lsr.iv = phi i32 [ %lsr.iv.next, %loop.body ], [ %start, %loop.ph ]
90    %count = phi i32 [ %elts, %loop.ph ], [ %elts.rem, %loop.body ]
91    %addr.a = phi <4 x i32>* [ %a, %loop.ph ], [ %addr.a.next, %loop.body ]
92    %addr.b = phi <4 x i32>* [ %b, %loop.ph ], [ %addr.b.next, %loop.body ]
93    %addr.c = phi <4 x i32>* [ %c, %loop.ph ], [ %addr.c.next, %loop.body ]
94    %pred = call <4 x i1> @llvm.arm.mve.vctp32(i32 %count)
95    %elts.rem = sub i32 %count, 4
96    %masked.load.a = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %addr.a, i32 4, <4 x i1> %pred, <4 x i32> undef)
97    %masked.load.b = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %addr.b, i32 4, <4 x i1> %pred, <4 x i32> undef)
98    %ctlz = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %masked.load.b, i1 false)
99    %bitcast.a = bitcast <4 x i32> %masked.load.a to <8 x i16>
100    %shrn = call <8 x i16> @llvm.arm.mve.vshrn.v8i16.v4i32(<8 x i16> %bitcast.a, <4 x i32> %ctlz, i32 3, i32 1, i32 0, i32 1, i32 0, i32 1)
101    %bitcast = bitcast <8 x i16> %shrn to <4 x i32>
102    call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %bitcast, <4 x i32>* %addr.c, i32 4, <4 x i1> %pred)
103    %addr.a.next = getelementptr <4 x i32>, <4 x i32>* %addr.a, i32 1
104    %addr.b.next = getelementptr <4 x i32>, <4 x i32>* %addr.b, i32 1
105    %addr.c.next = getelementptr <4 x i32>, <4 x i32>* %addr.c, i32 1
106    %loop.dec = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %lsr.iv, i32 1)
107    %end = icmp ne i32 %loop.dec, 0
108    %lsr.iv.next = add i32 %lsr.iv, -1
109    br i1 %end, label %loop.body, label %exit
110
111  exit:                                             ; preds = %loop.body, %entry
112    ret void
113  }
114
115  declare <4 x i32> @llvm.ctlz.v4i32(<4 x i32>, i1 immarg)
116  declare <8 x i16> @llvm.ctlz.v8i16(<8 x i16>, i1 immarg)
117  declare <16 x i8> @llvm.ctlz.v16i8(<16 x i8>, i1 immarg)
118  declare i32 @llvm.start.loop.iterations.i32(i32)
119  declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32)
120  declare <4 x i1> @llvm.arm.mve.vctp32(i32)
121  declare <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>*, i32 immarg, <4 x i1>, <4 x i32>)
122  declare void @llvm.masked.store.v4i32.p0v4i32(<4 x i32>, <4 x i32>*, i32 immarg, <4 x i1>)
123  declare <8 x i16> @llvm.arm.mve.vshrn.v8i16.v4i32(<8 x i16>, <4 x i32>, i32, i32, i32, i32, i32, i32)
124  declare <8 x i1> @llvm.arm.mve.vctp16(i32)
125  declare <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>*, i32 immarg, <8 x i1>, <8 x i16>)
126  declare void @llvm.masked.store.v8i16.p0v8i16(<8 x i16>, <8 x i16>*, i32 immarg, <8 x i1>)
127  declare <16 x i8> @llvm.arm.mve.vshrn.v16i8.v8i16(<16 x i8>, <8 x i16>, i32, i32, i32, i32, i32, i32)
128
129...
130---
131name:            test_ctlz_i8
132alignment:       2
133tracksRegLiveness: true
134registers:       []
135liveins:
136  - { reg: '$r0', virtual-reg: '' }
137  - { reg: '$r1', virtual-reg: '' }
138  - { reg: '$r2', virtual-reg: '' }
139  - { reg: '$r3', virtual-reg: '' }
140frameInfo:
141  stackSize:       8
142  offsetAdjustment: 0
143  maxAlignment:    4
144fixedStack:
145  - { id: 0, type: default, offset: 0, size: 4, alignment: 8, stack-id: default,
146      isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true,
147      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
148stack:
149  - { id: 0, name: '', type: spill-slot, offset: -4, size: 4, alignment: 4,
150      stack-id: default, callee-saved-register: '$lr', callee-saved-restored: false,
151      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
152  - { id: 1, name: '', type: spill-slot, offset: -8, size: 4, alignment: 4,
153      stack-id: default, callee-saved-register: '$r4', callee-saved-restored: true,
154      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
155callSites:       []
156constants:       []
157machineFunctionInfo: {}
158body:             |
159  ; CHECK-LABEL: name: test_ctlz_i8
160  ; CHECK: bb.0.entry:
161  ; CHECK:   successors: %bb.1(0x80000000)
162  ; CHECK:   liveins: $lr, $r0, $r1, $r2, $r3, $r4
163  ; CHECK:   frame-setup tPUSH 14 /* CC::al */, $noreg, killed $r4, killed $lr, implicit-def $sp, implicit $sp
164  ; CHECK:   frame-setup CFI_INSTRUCTION def_cfa_offset 8
165  ; CHECK:   frame-setup CFI_INSTRUCTION offset $lr, -4
166  ; CHECK:   frame-setup CFI_INSTRUCTION offset $r4, -8
167  ; CHECK:   tCMPi8 renamable $r3, 1, 14 /* CC::al */, $noreg, implicit-def $cpsr
168  ; CHECK:   t2IT 11, 8, implicit-def $itstate
169  ; CHECK:   frame-destroy tPOP_RET 11 /* CC::lt */, killed $cpsr, def $r4, def $pc, implicit killed $itstate
170  ; CHECK: bb.1.loop.ph:
171  ; CHECK:   successors: %bb.2(0x80000000)
172  ; CHECK:   liveins: $r0, $r1, $r2, $r3
173  ; CHECK:   renamable $lr = t2LDRi12 $sp, 8, 14 /* CC::al */, $noreg :: (load 4 from %fixed-stack.0, align 8)
174  ; CHECK:   $lr = t2DLS killed renamable $lr
175  ; CHECK:   $r4 = tMOVr killed $lr, 14 /* CC::al */, $noreg
176  ; CHECK: bb.2.loop.body:
177  ; CHECK:   successors: %bb.2(0x7c000000), %bb.3(0x04000000)
178  ; CHECK:   liveins: $r0, $r1, $r2, $r3, $r4
179  ; CHECK:   $lr = tMOVr $r4, 14 /* CC::al */, $noreg
180  ; CHECK:   renamable $vpr = MVE_VCTP16 renamable $r3, 0, $noreg
181  ; CHECK:   MVE_VPST 4, implicit $vpr
182  ; CHECK:   renamable $r1, renamable $q0 = MVE_VLDRHU16_post killed renamable $r1, 16, 1, renamable $vpr :: (load 16 from %ir.addr.b, align 2)
183  ; CHECK:   renamable $q1 = MVE_VLDRHU16 killed renamable $r0, 0, 1, renamable $vpr :: (load 16 from %ir.addr.a, align 2)
184  ; CHECK:   renamable $r3, dead $cpsr = tSUBi8 killed renamable $r3, 8, 14 /* CC::al */, $noreg
185  ; CHECK:   renamable $r4, dead $cpsr = tSUBi8 killed $r4, 1, 14 /* CC::al */, $noreg
186  ; CHECK:   renamable $q1 = MVE_VCLZs8 killed renamable $q1, 0, $noreg, undef renamable $q1
187  ; CHECK:   $r0 = tMOVr $r1, 14 /* CC::al */, $noreg
188  ; CHECK:   renamable $q1 = MVE_VQSHRUNs16th killed renamable $q1, killed renamable $q0, 1, 0, $noreg
189  ; CHECK:   MVE_VPST 8, implicit $vpr
190  ; CHECK:   renamable $r2 = MVE_VSTRHU16_post killed renamable $q1, killed renamable $r2, 16, 1, killed renamable $vpr :: (store 16 into %ir.addr.c, align 2)
191  ; CHECK:   dead $lr = t2LEUpdate killed renamable $lr, %bb.2
192  ; CHECK: bb.3.exit:
193  ; CHECK:   frame-destroy tPOP_RET 14 /* CC::al */, $noreg, def $r4, def $pc
194  bb.0.entry:
195    successors: %bb.1(0x80000000)
196    liveins: $r0, $r1, $r2, $r3, $r4, $lr
197
198    frame-setup tPUSH 14 /* CC::al */, $noreg, killed $r4, killed $lr, implicit-def $sp, implicit $sp
199    frame-setup CFI_INSTRUCTION def_cfa_offset 8
200    frame-setup CFI_INSTRUCTION offset $lr, -4
201    frame-setup CFI_INSTRUCTION offset $r4, -8
202    tCMPi8 renamable $r3, 1, 14 /* CC::al */, $noreg, implicit-def $cpsr
203    t2IT 11, 8, implicit-def $itstate
204    frame-destroy tPOP_RET 11 /* CC::lt */, killed $cpsr, def $r4, def $pc, implicit killed $itstate
205
206  bb.1.loop.ph:
207    successors: %bb.2(0x80000000)
208    liveins: $r0, $r1, $r2, $r3
209
210    renamable $lr = t2LDRi12 $sp, 8, 14 /* CC::al */, $noreg :: (load 4 from %fixed-stack.0, align 8)
211    renamable $lr = t2DoLoopStart killed renamable $lr
212    $r4 = tMOVr killed $lr, 14 /* CC::al */, $noreg
213
214  bb.2.loop.body:
215    successors: %bb.2(0x7c000000), %bb.3(0x04000000)
216    liveins: $r0, $r1, $r2, $r3, $r4
217
218    $lr = tMOVr $r4, 14 /* CC::al */, $noreg
219    renamable $vpr = MVE_VCTP16 renamable $r3, 0, $noreg
220    MVE_VPST 4, implicit $vpr
221    renamable $r1, renamable $q0 = MVE_VLDRHU16_post killed renamable $r1, 16, 1, renamable $vpr :: (load 16 from %ir.addr.b, align 2)
222    renamable $q1 = MVE_VLDRHU16 killed renamable $r0, 0, 1, renamable $vpr :: (load 16 from %ir.addr.a, align 2)
223    renamable $r3, dead $cpsr = tSUBi8 killed renamable $r3, 8, 14 /* CC::al */, $noreg
224    renamable $r4, dead $cpsr = tSUBi8 killed $r4, 1, 14 /* CC::al */, $noreg
225    renamable $q1 = MVE_VCLZs8 killed renamable $q1, 0, $noreg, undef renamable $q1
226    renamable $lr = t2LoopDec killed renamable $lr, 1
227    $r0 = tMOVr $r1, 14 /* CC::al */, $noreg
228    renamable $q1 = MVE_VQSHRUNs16th killed renamable $q1, killed renamable $q0, 1, 0, $noreg
229    MVE_VPST 8, implicit $vpr
230    renamable $r2 = MVE_VSTRHU16_post killed renamable $q1, killed renamable $r2, 16, 1, killed renamable $vpr :: (store 16 into %ir.addr.c, align 2)
231    t2LoopEnd killed renamable $lr, %bb.2, implicit-def dead $cpsr
232    tB %bb.3, 14 /* CC::al */, $noreg
233
234  bb.3.exit:
235    frame-destroy tPOP_RET 14 /* CC::al */, $noreg, def $r4, def $pc
236
237...
238---
239name:            test_ctlz_i16
240alignment:       2
241tracksRegLiveness: true
242registers:       []
243liveins:
244  - { reg: '$r0', virtual-reg: '' }
245  - { reg: '$r1', virtual-reg: '' }
246  - { reg: '$r2', virtual-reg: '' }
247  - { reg: '$r3', virtual-reg: '' }
248frameInfo:
249  stackSize:       8
250  offsetAdjustment: 0
251  maxAlignment:    4
252fixedStack:
253  - { id: 0, type: default, offset: 0, size: 4, alignment: 8, stack-id: default,
254      isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true,
255      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
256stack:
257  - { id: 0, name: '', type: spill-slot, offset: -4, size: 4, alignment: 4,
258      stack-id: default, callee-saved-register: '$lr', callee-saved-restored: false,
259      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
260  - { id: 1, name: '', type: spill-slot, offset: -8, size: 4, alignment: 4,
261      stack-id: default, callee-saved-register: '$r4', callee-saved-restored: true,
262      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
263callSites:       []
264constants:       []
265machineFunctionInfo: {}
266body:             |
267  ; CHECK-LABEL: name: test_ctlz_i16
268  ; CHECK: bb.0.entry:
269  ; CHECK:   successors: %bb.1(0x80000000)
270  ; CHECK:   liveins: $lr, $r0, $r1, $r2, $r3, $r4, $r7
271  ; CHECK:   frame-setup tPUSH 14 /* CC::al */, $noreg, killed $r7, killed $lr, implicit-def $sp, implicit $sp
272  ; CHECK:   frame-setup CFI_INSTRUCTION def_cfa_offset 8
273  ; CHECK:   frame-setup CFI_INSTRUCTION offset $lr, -4
274  ; CHECK:   frame-setup CFI_INSTRUCTION offset $r7, -8
275  ; CHECK:   tCMPi8 renamable $r3, 1, 14 /* CC::al */, $noreg, implicit-def $cpsr
276  ; CHECK:   t2IT 11, 8, implicit-def $itstate
277  ; CHECK:   frame-destroy tPOP_RET 11 /* CC::lt */, killed $cpsr, def dead $r7, def $pc, implicit killed $itstate
278  ; CHECK: bb.1.loop.ph:
279  ; CHECK:   successors: %bb.2(0x80000000)
280  ; CHECK:   liveins: $r0, $r1, $r2, $r3, $r4
281  ; CHECK:   renamable $lr = t2LDRi12 $sp, 8, 14 /* CC::al */, $noreg :: (load 4 from %fixed-stack.0, align 8)
282  ; CHECK:   $lr = t2DLS killed renamable $lr
283  ; CHECK:   $r12 = tMOVr killed $lr, 14 /* CC::al */, $noreg
284  ; CHECK: bb.2.loop.body:
285  ; CHECK:   successors: %bb.2(0x7c000000), %bb.3(0x04000000)
286  ; CHECK:   liveins: $r0, $r1, $r2, $r3, $r4, $r12
287  ; CHECK:   $lr = tMOVr $r12, 14 /* CC::al */, $noreg
288  ; CHECK:   renamable $vpr = MVE_VCTP32 renamable $r3, 0, $noreg
289  ; CHECK:   MVE_VPST 4, implicit $vpr
290  ; CHECK:   renamable $r1, renamable $q0 = MVE_VLDRWU32_post killed renamable $r1, 16, 1, renamable $vpr :: (load 16 from %ir.addr.b, align 4)
291  ; CHECK:   renamable $r0, renamable $q1 = MVE_VLDRWU32_post killed renamable $r0, 16, 1, renamable $vpr :: (load 16 from %ir.addr.a, align 4)
292  ; CHECK:   renamable $r3, dead $cpsr = tSUBi8 killed renamable $r3, 4, 14 /* CC::al */, $noreg
293  ; CHECK:   renamable $r12 = t2SUBri killed $r12, 1, 14 /* CC::al */, $noreg, $noreg
294  ; CHECK:   renamable $q1 = MVE_VCLZs16 killed renamable $q1, 0, $noreg, undef renamable $q1
295  ; CHECK:   renamable $q1 = MVE_VQSHRUNs32th killed renamable $q1, killed renamable $q0, 3, 0, $noreg
296  ; CHECK:   MVE_VPST 8, implicit $vpr
297  ; CHECK:   renamable $r2 = MVE_VSTRWU32_post killed renamable $q1, killed renamable $r2, 16, 1, killed renamable $vpr :: (store 16 into %ir.addr.c, align 4)
298  ; CHECK:   dead $lr = t2LEUpdate killed renamable $lr, %bb.2
299  ; CHECK: bb.3.exit:
300  ; CHECK:   liveins: $r4
301  ; CHECK:   frame-destroy tPOP_RET 14 /* CC::al */, $noreg, def dead $r7, def $pc
302  bb.0.entry:
303    successors: %bb.1(0x80000000)
304    liveins: $r0, $r1, $r2, $r3, $r7, $lr
305
306    frame-setup tPUSH 14 /* CC::al */, $noreg, killed $r7, killed $lr, implicit-def $sp, implicit $sp
307    frame-setup CFI_INSTRUCTION def_cfa_offset 8
308    frame-setup CFI_INSTRUCTION offset $lr, -4
309    frame-setup CFI_INSTRUCTION offset $r7, -8
310    tCMPi8 renamable $r3, 1, 14 /* CC::al */, $noreg, implicit-def $cpsr
311    t2IT 11, 8, implicit-def $itstate
312    frame-destroy tPOP_RET 11 /* CC::lt */, killed $cpsr, def $r7, def $pc, implicit killed $itstate
313
314  bb.1.loop.ph:
315    successors: %bb.2(0x80000000)
316    liveins: $r0, $r1, $r2, $r3
317
318    renamable $lr = t2LDRi12 $sp, 8, 14 /* CC::al */, $noreg :: (load 4 from %fixed-stack.0, align 8)
319    renamable $lr = t2DoLoopStart killed renamable $lr
320    $r12 = tMOVr killed $lr, 14 /* CC::al */, $noreg
321
322  bb.2.loop.body:
323    successors: %bb.2(0x7c000000), %bb.3(0x04000000)
324    liveins: $r0, $r1, $r2, $r3, $r12
325
326    $lr = tMOVr $r12, 14 /* CC::al */, $noreg
327    renamable $vpr = MVE_VCTP32 renamable $r3, 0, $noreg
328    MVE_VPST 4, implicit $vpr
329    renamable $r1, renamable $q0 = MVE_VLDRWU32_post killed renamable $r1, 16, 1, renamable $vpr :: (load 16 from %ir.addr.b, align 4)
330    renamable $r0, renamable $q1 = MVE_VLDRWU32_post killed renamable $r0, 16, 1, renamable $vpr :: (load 16 from %ir.addr.a, align 4)
331    renamable $r3, dead $cpsr = tSUBi8 killed renamable $r3, 4, 14 /* CC::al */, $noreg
332    renamable $r12 = t2SUBri killed $r12, 1, 14 /* CC::al */, $noreg, $noreg
333    renamable $q1 = MVE_VCLZs16 killed renamable $q1, 0, $noreg, undef renamable $q1
334    renamable $lr = t2LoopDec killed renamable $lr, 1
335    renamable $q1 = MVE_VQSHRUNs32th killed renamable $q1, killed renamable $q0, 3, 0, $noreg
336    MVE_VPST 8, implicit $vpr
337    renamable $r2 = MVE_VSTRWU32_post killed renamable $q1, killed renamable $r2, 16, 1, killed renamable $vpr :: (store 16 into %ir.addr.c, align 4)
338    t2LoopEnd killed renamable $lr, %bb.2, implicit-def dead $cpsr
339    tB %bb.3, 14 /* CC::al */, $noreg
340
341  bb.3.exit:
342    frame-destroy tPOP_RET 14 /* CC::al */, $noreg, def $r7, def $pc
343
344...
345---
346name:            test_ctlz_i32
347alignment:       2
348tracksRegLiveness: true
349registers:       []
350liveins:
351  - { reg: '$r0', virtual-reg: '' }
352  - { reg: '$r1', virtual-reg: '' }
353  - { reg: '$r2', virtual-reg: '' }
354  - { reg: '$r3', virtual-reg: '' }
355frameInfo:
356  stackSize:       8
357  offsetAdjustment: 0
358  maxAlignment:    4
359fixedStack:
360  - { id: 0, type: default, offset: 0, size: 4, alignment: 8, stack-id: default,
361      isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true,
362      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
363stack:
364  - { id: 0, name: '', type: spill-slot, offset: -4, size: 4, alignment: 4,
365      stack-id: default, callee-saved-register: '$lr', callee-saved-restored: false,
366      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
367  - { id: 1, name: '', type: spill-slot, offset: -8, size: 4, alignment: 4,
368      stack-id: default, callee-saved-register: '$r4', callee-saved-restored: true,
369      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
370callSites:       []
371constants:       []
372machineFunctionInfo: {}
373body:             |
374  ; CHECK-LABEL: name: test_ctlz_i32
375  ; CHECK: bb.0.entry:
376  ; CHECK:   successors: %bb.1(0x80000000)
377  ; CHECK:   liveins: $lr, $r0, $r1, $r2, $r3, $r4, $r7
378  ; CHECK:   frame-setup tPUSH 14 /* CC::al */, $noreg, killed $r7, killed $lr, implicit-def $sp, implicit $sp
379  ; CHECK:   frame-setup CFI_INSTRUCTION def_cfa_offset 8
380  ; CHECK:   frame-setup CFI_INSTRUCTION offset $lr, -4
381  ; CHECK:   frame-setup CFI_INSTRUCTION offset $r7, -8
382  ; CHECK:   tCMPi8 renamable $r3, 1, 14 /* CC::al */, $noreg, implicit-def $cpsr
383  ; CHECK:   t2IT 11, 8, implicit-def $itstate
384  ; CHECK:   frame-destroy tPOP_RET 11 /* CC::lt */, killed $cpsr, def dead $r7, def $pc, implicit killed $itstate
385  ; CHECK: bb.1.loop.ph:
386  ; CHECK:   successors: %bb.2(0x80000000)
387  ; CHECK:   liveins: $r0, $r1, $r2, $r3, $r4
388  ; CHECK:   renamable $lr = t2LDRi12 $sp, 8, 14 /* CC::al */, $noreg :: (load 4 from %fixed-stack.0, align 8)
389  ; CHECK:   $lr = t2DLS killed renamable $lr
390  ; CHECK:   $r12 = tMOVr killed $lr, 14 /* CC::al */, $noreg
391  ; CHECK: bb.2.loop.body:
392  ; CHECK:   successors: %bb.2(0x7c000000), %bb.3(0x04000000)
393  ; CHECK:   liveins: $r0, $r1, $r2, $r3, $r4, $r12
394  ; CHECK:   $lr = tMOVr $r12, 14 /* CC::al */, $noreg
395  ; CHECK:   renamable $vpr = MVE_VCTP32 renamable $r3, 0, $noreg
396  ; CHECK:   MVE_VPST 4, implicit $vpr
397  ; CHECK:   renamable $r0, renamable $q0 = MVE_VLDRWU32_post killed renamable $r0, 16, 1, renamable $vpr :: (load 16 from %ir.addr.a, align 4)
398  ; CHECK:   renamable $r1, renamable $q1 = MVE_VLDRWU32_post killed renamable $r1, 16, 1, renamable $vpr :: (load 16 from %ir.addr.b, align 4)
399  ; CHECK:   renamable $r3, dead $cpsr = tSUBi8 killed renamable $r3, 4, 14 /* CC::al */, $noreg
400  ; CHECK:   renamable $r12 = t2SUBri killed $r12, 1, 14 /* CC::al */, $noreg, $noreg
401  ; CHECK:   renamable $q1 = MVE_VCLZs32 killed renamable $q1, 0, $noreg, undef renamable $q1
402  ; CHECK:   renamable $q0 = MVE_VQSHRUNs32th killed renamable $q0, killed renamable $q1, 3, 0, $noreg
403  ; CHECK:   MVE_VPST 8, implicit $vpr
404  ; CHECK:   renamable $r2 = MVE_VSTRWU32_post killed renamable $q0, killed renamable $r2, 16, 1, killed renamable $vpr :: (store 16 into %ir.addr.c, align 4)
405  ; CHECK:   dead $lr = t2LEUpdate killed renamable $lr, %bb.2
406  ; CHECK: bb.3.exit:
407  ; CHECK:   liveins: $r4
408  ; CHECK:   frame-destroy tPOP_RET 14 /* CC::al */, $noreg, def dead $r7, def $pc
409  bb.0.entry:
410    successors: %bb.1(0x80000000)
411    liveins: $r0, $r1, $r2, $r3, $r7, $lr
412
413    frame-setup tPUSH 14 /* CC::al */, $noreg, killed $r7, killed $lr, implicit-def $sp, implicit $sp
414    frame-setup CFI_INSTRUCTION def_cfa_offset 8
415    frame-setup CFI_INSTRUCTION offset $lr, -4
416    frame-setup CFI_INSTRUCTION offset $r7, -8
417    tCMPi8 renamable $r3, 1, 14 /* CC::al */, $noreg, implicit-def $cpsr
418    t2IT 11, 8, implicit-def $itstate
419    frame-destroy tPOP_RET 11 /* CC::lt */, killed $cpsr, def $r7, def $pc, implicit killed $itstate
420
421  bb.1.loop.ph:
422    successors: %bb.2(0x80000000)
423    liveins: $r0, $r1, $r2, $r3
424
425    renamable $lr = t2LDRi12 $sp, 8, 14 /* CC::al */, $noreg :: (load 4 from %fixed-stack.0, align 8)
426    renamable $lr = t2DoLoopStart killed renamable $lr
427    $r12 = tMOVr killed $lr, 14 /* CC::al */, $noreg
428
429  bb.2.loop.body:
430    successors: %bb.2(0x7c000000), %bb.3(0x04000000)
431    liveins: $r0, $r1, $r2, $r3, $r12
432
433    $lr = tMOVr $r12, 14 /* CC::al */, $noreg
434    renamable $vpr = MVE_VCTP32 renamable $r3, 0, $noreg
435    MVE_VPST 4, implicit $vpr
436    renamable $r0, renamable $q0 = MVE_VLDRWU32_post killed renamable $r0, 16, 1, renamable $vpr :: (load 16 from %ir.addr.a, align 4)
437    renamable $r1, renamable $q1 = MVE_VLDRWU32_post killed renamable $r1, 16, 1, renamable $vpr :: (load 16 from %ir.addr.b, align 4)
438    renamable $r3, dead $cpsr = tSUBi8 killed renamable $r3, 4, 14 /* CC::al */, $noreg
439    renamable $r12 = t2SUBri killed $r12, 1, 14 /* CC::al */, $noreg, $noreg
440    renamable $q1 = MVE_VCLZs32 killed renamable $q1, 0, $noreg, undef renamable $q1
441    renamable $lr = t2LoopDec killed renamable $lr, 1
442    renamable $q0 = MVE_VQSHRUNs32th killed renamable $q0, killed renamable $q1, 3, 0, $noreg
443    MVE_VPST 8, implicit $vpr
444    renamable $r2 = MVE_VSTRWU32_post killed renamable $q0, killed renamable $r2, 16, 1, killed renamable $vpr :: (store 16 into %ir.addr.c, align 4)
445    t2LoopEnd killed renamable $lr, %bb.2, implicit-def dead $cpsr
446    tB %bb.3, 14 /* CC::al */, $noreg
447
448  bb.3.exit:
449    frame-destroy tPOP_RET 14 /* CC::al */, $noreg, def $r7, def $pc
450
451...
452