1# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2# RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve -run-pass=arm-low-overhead-loops %s -o - --verify-machineinstrs | FileCheck %s
3
4--- |
5  define dso_local arm_aapcs_vfpcc void @test_wlstp8(i8* noalias nocapture %a, i8* noalias nocapture readonly %b, i8* noalias nocapture readonly %c, i32 %N) {
6  entry:
7    %0 = add i32 %N, 15
8    %1 = lshr i32 %0, 4
9    %2 = shl nuw i32 %1, 4
10    %3 = add i32 %2, -16
11    %4 = lshr i32 %3, 4
12    %n.vec = add nuw nsw i32 %4, 1
13    %cmp = call i1 @llvm.test.set.loop.iterations.i32(i32 %n.vec)
14    br i1 %cmp, label %for.cond.cleanup, label %vector.ph
15
16  vector.ph:                                        ; preds = %entry
17    br label %vector.body
18
19  vector.body:                                      ; preds = %vector.body, %vector.ph
20    %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
21    %count = phi i32 [ %n.vec, %vector.ph ], [ %loop.dec, %vector.body ]
22    %5 = phi i32 [ %N, %vector.ph ], [ %7, %vector.body ]
23    %6 = call <16 x i1> @llvm.arm.vctp8(i32 %5)
24    %7 = sub i32 %5, 16
25    %scevgep4 = getelementptr i8, i8* %b, i32 %index
26    %scevgep45 = bitcast i8* %scevgep4 to <16 x i8>*
27    %wide.masked.load = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* %scevgep45, i32 1, <16 x i1> %6, <16 x i8> undef)
28    %scevgep2 = getelementptr i8, i8* %c, i32 %index
29    %scevgep23 = bitcast i8* %scevgep2 to <16 x i8>*
30    %wide.masked.load14 = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* %scevgep23, i32 1, <16 x i1> %6, <16 x i8> undef)
31    %tmp5 = mul <16 x i8> %wide.masked.load14, %wide.masked.load
32    %scevgep = getelementptr i8, i8* %a, i32 %index
33    %scevgep1 = bitcast i8* %scevgep to <16 x i8>*
34    call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> %tmp5, <16 x i8>* %scevgep1, i32 1, <16 x i1> %6)
35    %index.next = add i32 %index, 16
36    %loop.dec = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %count, i32 1)
37    %tmp8 = icmp eq i32 %loop.dec, 0
38    br i1 %tmp8, label %for.cond.cleanup, label %vector.body
39
40  for.cond.cleanup:                                 ; preds = %vector.body, %entry
41    ret void
42  }
43
44  define dso_local arm_aapcs_vfpcc void @test_wlstp16(i16* noalias nocapture %a, i16* noalias nocapture readonly %b, i16* noalias nocapture readonly %c, i32 %N) {
45  entry:
46    %0 = add i32 %N, 7
47    %1 = lshr i32 %0, 3
48    %2 = shl nuw i32 %1, 3
49    %3 = add i32 %2, -8
50    %4 = lshr i32 %3, 3
51    %n.vec = add nuw nsw i32 %4, 1
52    %cmp = call i1 @llvm.test.set.loop.iterations.i32(i32 %n.vec)
53    br i1 %cmp, label %for.cond.cleanup, label %vector.ph
54
55  vector.ph:                                        ; preds = %entry
56    br label %vector.body
57
58  vector.body:                                      ; preds = %vector.body, %vector.ph
59    %lsr.iv5 = phi i16* [ %scevgep6, %vector.body ], [ %b, %vector.ph ]
60    %lsr.iv2 = phi i16* [ %scevgep3, %vector.body ], [ %c, %vector.ph ]
61    %lsr.iv = phi i16* [ %scevgep, %vector.body ], [ %a, %vector.ph ]
62    %count = phi i32 [ %n.vec, %vector.ph ], [ %loop.dec, %vector.body ]
63    %5 = phi i32 [ %N, %vector.ph ], [ %7, %vector.body ]
64    %lsr.iv57 = bitcast i16* %lsr.iv5 to <8 x i16>*
65    %lsr.iv24 = bitcast i16* %lsr.iv2 to <8 x i16>*
66    %lsr.iv1 = bitcast i16* %lsr.iv to <8 x i16>*
67    %6 = call <8 x i1> @llvm.arm.vctp16(i32 %5)
68    %7 = sub i32 %5, 8
69    %wide.masked.load = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %lsr.iv57, i32 2, <8 x i1> %6, <8 x i16> undef)
70    %wide.masked.load14 = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %lsr.iv24, i32 2, <8 x i1> %6, <8 x i16> undef)
71    %tmp5 = mul <8 x i16> %wide.masked.load14, %wide.masked.load
72    call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> %tmp5, <8 x i16>* %lsr.iv1, i32 2, <8 x i1> %6)
73    %loop.dec = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %count, i32 1)
74    %tmp8 = icmp eq i32 %loop.dec, 0
75    %scevgep = getelementptr i16, i16* %lsr.iv, i32 8
76    %scevgep3 = getelementptr i16, i16* %lsr.iv2, i32 8
77    %scevgep6 = getelementptr i16, i16* %lsr.iv5, i32 8
78    br i1 %tmp8, label %for.cond.cleanup, label %vector.body
79
80  for.cond.cleanup:                                 ; preds = %vector.body, %entry
81    ret void
82  }
83
84  define dso_local i32 @test_wlstp32(i32* noalias nocapture readonly %a, i32* noalias nocapture readonly %b, i32 %N) {
85  entry:
86    %0 = add i32 %N, 3
87    %1 = lshr i32 %0, 2
88    %2 = shl nuw i32 %1, 2
89    %3 = add i32 %2, -4
90    %4 = lshr i32 %3, 2
91    %n.vec = add nuw nsw i32 %4, 1
92    %cmp = call i1 @llvm.test.set.loop.iterations.i32(i32 %n.vec)
93    br i1 %cmp, label %for.cond.cleanup, label %vector.ph
94
95  vector.ph:                                        ; preds = %entry
96    br label %vector.body
97
98  vector.body:                                      ; preds = %vector.body, %vector.ph
99    %lsr.iv2 = phi i32* [ %scevgep3, %vector.body ], [ %a, %vector.ph ]
100    %lsr.iv = phi i32* [ %scevgep, %vector.body ], [ %b, %vector.ph ]
101    %count = phi i32 [ %n.vec, %vector.ph ], [ %loop.dec, %vector.body ]
102    %vec.phi = phi <4 x i32> [ zeroinitializer, %vector.ph ], [ %tmp6, %vector.body ]
103    %5 = phi i32 [ %N, %vector.ph ], [ %7, %vector.body ]
104    %lsr.iv24 = bitcast i32* %lsr.iv2 to <4 x i32>*
105    %lsr.iv1 = bitcast i32* %lsr.iv to <4 x i32>*
106    %6 = call <4 x i1> @llvm.arm.vctp32(i32 %5)
107    %7 = sub i32 %5, 4
108    %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %lsr.iv24, i32 4, <4 x i1> %6, <4 x i32> undef)
109    %wide.masked.load13 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %lsr.iv1, i32 4, <4 x i1> %6, <4 x i32> undef)
110    %tmp5 = mul nsw <4 x i32> %wide.masked.load13, %wide.masked.load
111    %tmp6 = add nsw <4 x i32> %tmp5, %vec.phi
112    %loop.dec = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %count, i32 1)
113    %tmp7 = icmp eq i32 %loop.dec, 0
114    %scevgep = getelementptr i32, i32* %lsr.iv, i32 4
115    %scevgep3 = getelementptr i32, i32* %lsr.iv2, i32 4
116    br i1 %tmp7, label %middle.block, label %vector.body
117
118  middle.block:                                     ; preds = %vector.body
119    %8 = call <4 x i1> @llvm.arm.vctp32(i32 %5)
120    %tmp8 = select <4 x i1> %8, <4 x i32> %tmp6, <4 x i32> %vec.phi
121    %tmp9 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %tmp8)
122    br label %for.cond.cleanup
123
124  for.cond.cleanup:                                 ; preds = %middle.block, %entry
125    %res.0.lcssa = phi i32 [ 0, %entry ], [ %tmp9, %middle.block ]
126    ret i32 %res.0.lcssa
127  }
128
129  declare i1 @llvm.test.set.loop.iterations.i32(i32)
130  declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32)
131  declare <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>*, i32 immarg, <16 x i1>, <16 x i8>)
132  declare <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>*, i32 immarg, <8 x i1>, <8 x i16>)
133  declare <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>*, i32 immarg, <4 x i1>, <4 x i32>)
134  declare void @llvm.masked.store.v16i8.p0v16i8(<16 x i8>, <16 x i8>*, i32 immarg, <16 x i1>)
135  declare void @llvm.masked.store.v8i16.p0v8i16(<8 x i16>, <8 x i16>*, i32 immarg, <8 x i1>)
136  declare void @llvm.masked.store.v4i32.p0v4i32(<4 x i32>, <4 x i32>*, i32 immarg, <4 x i1>)
137  declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>)
138  declare <16 x i1> @llvm.arm.vctp8(i32)
139  declare void @llvm.stackprotector(i8*, i8**)
140  declare <8 x i1> @llvm.arm.vctp16(i32)
141  declare <4 x i1> @llvm.arm.vctp32(i32)
142...
143---
144name:            test_wlstp8
145alignment:       2
146exposesReturnsTwice: false
147legalized:       false
148regBankSelected: false
149selected:        false
150failedISel:      false
151tracksRegLiveness: true
152hasWinCFI:       false
153registers:       []
154liveins:
155  - { reg: '$r0', virtual-reg: '' }
156  - { reg: '$r1', virtual-reg: '' }
157  - { reg: '$r2', virtual-reg: '' }
158  - { reg: '$r3', virtual-reg: '' }
159frameInfo:
160  isFrameAddressTaken: false
161  isReturnAddressTaken: false
162  hasStackMap:     false
163  hasPatchPoint:   false
164  stackSize:       8
165  offsetAdjustment: 0
166  maxAlignment:    4
167  adjustsStack:    false
168  hasCalls:        false
169  stackProtector:  ''
170  maxCallFrameSize: 0
171  cvBytesOfCalleeSavedRegisters: 0
172  hasOpaqueSPAdjustment: false
173  hasVAStart:      false
174  hasMustTailInVarArgFunc: false
175  localFrameSize:  0
176  savePoint:       ''
177  restorePoint:    ''
178fixedStack:      []
179stack:
180  - { id: 0, name: '', type: spill-slot, offset: -4, size: 4, alignment: 4,
181      stack-id: default, callee-saved-register: '$lr', callee-saved-restored: false,
182      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
183  - { id: 1, name: '', type: spill-slot, offset: -8, size: 4, alignment: 4,
184      stack-id: default, callee-saved-register: '$r4', callee-saved-restored: true,
185      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
186callSites:       []
187constants:       []
188machineFunctionInfo: {}
189body:             |
190  ; CHECK-LABEL: name: test_wlstp8
191  ; CHECK: bb.0.entry:
192  ; CHECK:   successors: %bb.3(0x40000000), %bb.1(0x40000000)
193  ; CHECK:   liveins: $lr, $r0, $r1, $r2, $r3, $r4
194  ; CHECK:   frame-setup tPUSH 14 /* CC::al */, $noreg, killed $r4, killed $lr, implicit-def $sp, implicit $sp
195  ; CHECK:   frame-setup CFI_INSTRUCTION def_cfa_offset 8
196  ; CHECK:   frame-setup CFI_INSTRUCTION offset $lr, -4
197  ; CHECK:   frame-setup CFI_INSTRUCTION offset $r4, -8
198  ; CHECK:   $lr = MVE_WLSTP_8 killed renamable $r3, %bb.1
199  ; CHECK:   tB %bb.3, 14 /* CC::al */, $noreg
200  ; CHECK: bb.1.vector.ph:
201  ; CHECK:   successors: %bb.2(0x80000000)
202  ; CHECK:   liveins: $lr, $r0, $r1, $r2
203  ; CHECK:   renamable $r12 = t2MOVi 0, 14 /* CC::al */, $noreg, $noreg
204  ; CHECK: bb.2.vector.body:
205  ; CHECK:   successors: %bb.3(0x04000000), %bb.2(0x7c000000)
206  ; CHECK:   liveins: $lr, $r0, $r1, $r2, $r12
207  ; CHECK:   renamable $r4 = t2ADDrr renamable $r1, renamable $r12, 14 /* CC::al */, $noreg, $noreg
208  ; CHECK:   renamable $q0 = MVE_VLDRBU8 killed renamable $r4, 0, 0, $noreg :: (load 16 from %ir.scevgep45, align 1)
209  ; CHECK:   renamable $r4 = t2ADDrr renamable $r2, renamable $r12, 14 /* CC::al */, $noreg, $noreg
210  ; CHECK:   renamable $q1 = MVE_VLDRBU8 killed renamable $r4, 0, 0, $noreg :: (load 16 from %ir.scevgep23, align 1)
211  ; CHECK:   renamable $r4 = t2ADDrr renamable $r0, renamable $r12, 14 /* CC::al */, $noreg, $noreg
212  ; CHECK:   renamable $r12 = t2ADDri killed renamable $r12, 16, 14 /* CC::al */, $noreg, $noreg
213  ; CHECK:   renamable $q0 = MVE_VMULi8 killed renamable $q1, killed renamable $q0, 0, $noreg, undef renamable $q0
214  ; CHECK:   MVE_VSTRBU8 killed renamable $q0, killed renamable $r4, 0, 0, killed $noreg :: (store 16 into %ir.scevgep1, align 1)
215  ; CHECK:   $lr = MVE_LETP killed renamable $lr, %bb.2
216  ; CHECK: bb.3.for.cond.cleanup:
217  ; CHECK:   tPOP_RET 14 /* CC::al */, $noreg, def $r4, def $pc
218  bb.0.entry:
219    successors: %bb.3(0x40000000), %bb.1(0x40000000)
220    liveins: $r0, $r1, $r2, $r3, $r4, $lr
221
222    frame-setup tPUSH 14, $noreg, killed $r4, killed $lr, implicit-def $sp, implicit $sp
223    frame-setup CFI_INSTRUCTION def_cfa_offset 8
224    frame-setup CFI_INSTRUCTION offset $lr, -4
225    frame-setup CFI_INSTRUCTION offset $r4, -8
226    renamable $r12 = t2ADDri renamable $r3, 15, 14, $noreg, $noreg
227    renamable $lr = t2MOVi 1, 14, $noreg, $noreg
228    renamable $r12 = t2BICri killed renamable $r12, 15, 14, $noreg, $noreg
229    renamable $r12 = t2SUBri killed renamable $r12, 16, 14, $noreg, $noreg
230    renamable $lr = nuw nsw t2ADDrs killed renamable $lr, killed renamable $r12, 35, 14, $noreg, $noreg
231    t2WhileLoopStart renamable $lr, %bb.1, implicit-def dead $cpsr
232    tB %bb.3, 14, $noreg
233
234  bb.1.vector.ph:
235    successors: %bb.2(0x80000000)
236    liveins: $lr, $r0, $r1, $r2, $r3
237
238    renamable $r12 = t2MOVi 0, 14, $noreg, $noreg
239
240  bb.2.vector.body:
241    successors: %bb.3(0x04000000), %bb.2(0x7c000000)
242    liveins: $lr, $r0, $r1, $r2, $r3, $r12
243
244    renamable $r4 = t2ADDrr renamable $r1, renamable $r12, 14, $noreg, $noreg
245    renamable $vpr = MVE_VCTP8 renamable $r3, 0, $noreg
246    MVE_VPST 8, implicit $vpr
247    renamable $q0 = MVE_VLDRBU8 killed renamable $r4, 0, 1, renamable $vpr :: (load 16 from %ir.scevgep45, align 1)
248    renamable $r4 = t2ADDrr renamable $r2, renamable $r12, 14, $noreg, $noreg
249    MVE_VPST 8, implicit $vpr
250    renamable $q1 = MVE_VLDRBU8 killed renamable $r4, 0, 1, renamable $vpr :: (load 16 from %ir.scevgep23, align 1)
251    renamable $r4 = t2ADDrr renamable $r0, renamable $r12, 14, $noreg, $noreg
252    renamable $r12 = t2ADDri killed renamable $r12, 16, 14, $noreg, $noreg
253    renamable $r3, dead $cpsr = tSUBi8 killed renamable $r3, 16, 14, $noreg
254    renamable $q0 = MVE_VMULi8 killed renamable $q1, killed renamable $q0, 0, $noreg, undef renamable $q0
255    MVE_VPST 8, implicit $vpr
256    MVE_VSTRBU8 killed renamable $q0, killed renamable $r4, 0, 1, killed renamable $vpr :: (store 16 into %ir.scevgep1, align 1)
257    renamable $lr = t2LoopDec killed renamable $lr, 1
258    t2LoopEnd renamable $lr, %bb.2, implicit-def dead $cpsr
259    tB %bb.3, 14, $noreg
260
261  bb.3.for.cond.cleanup:
262    tPOP_RET 14, $noreg, def $r4, def $pc
263
264...
265---
266name:            test_wlstp16
267alignment:       2
268exposesReturnsTwice: false
269legalized:       false
270regBankSelected: false
271selected:        false
272failedISel:      false
273tracksRegLiveness: true
274hasWinCFI:       false
275registers:       []
276liveins:
277  - { reg: '$r0', virtual-reg: '' }
278  - { reg: '$r1', virtual-reg: '' }
279  - { reg: '$r2', virtual-reg: '' }
280  - { reg: '$r3', virtual-reg: '' }
281frameInfo:
282  isFrameAddressTaken: false
283  isReturnAddressTaken: false
284  hasStackMap:     false
285  hasPatchPoint:   false
286  stackSize:       8
287  offsetAdjustment: 0
288  maxAlignment:    4
289  adjustsStack:    false
290  hasCalls:        false
291  stackProtector:  ''
292  maxCallFrameSize: 0
293  cvBytesOfCalleeSavedRegisters: 0
294  hasOpaqueSPAdjustment: false
295  hasVAStart:      false
296  hasMustTailInVarArgFunc: false
297  localFrameSize:  0
298  savePoint:       ''
299  restorePoint:    ''
300fixedStack:      []
301stack:
302  - { id: 0, name: '', type: spill-slot, offset: -4, size: 4, alignment: 4,
303      stack-id: default, callee-saved-register: '$lr', callee-saved-restored: false,
304      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
305  - { id: 1, name: '', type: spill-slot, offset: -8, size: 4, alignment: 4,
306      stack-id: default, callee-saved-register: '$r7', callee-saved-restored: true,
307      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
308callSites:       []
309constants:       []
310machineFunctionInfo: {}
311body:             |
312  ; CHECK-LABEL: name: test_wlstp16
313  ; CHECK: bb.0.entry:
314  ; CHECK:   successors: %bb.2(0x40000000), %bb.1(0x40000000)
315  ; CHECK:   liveins: $lr, $r0, $r1, $r2, $r3, $r7
316  ; CHECK:   frame-setup tPUSH 14 /* CC::al */, $noreg, killed $r7, killed $lr, implicit-def $sp, implicit $sp
317  ; CHECK:   frame-setup CFI_INSTRUCTION def_cfa_offset 8
318  ; CHECK:   frame-setup CFI_INSTRUCTION offset $lr, -4
319  ; CHECK:   frame-setup CFI_INSTRUCTION offset $r7, -8
320  ; CHECK:   $lr = MVE_WLSTP_16 killed renamable $r3, %bb.1
321  ; CHECK:   tB %bb.2, 14 /* CC::al */, $noreg
322  ; CHECK: bb.1.vector.body:
323  ; CHECK:   successors: %bb.2(0x04000000), %bb.1(0x7c000000)
324  ; CHECK:   liveins: $lr, $r0, $r1, $r2
325  ; CHECK:   renamable $q0 = MVE_VLDRHU16 renamable $r1, 0, 0, $noreg :: (load 16 from %ir.lsr.iv57, align 2)
326  ; CHECK:   renamable $q1 = MVE_VLDRHU16 renamable $r2, 0, 0, $noreg :: (load 16 from %ir.lsr.iv24, align 2)
327  ; CHECK:   renamable $q0 = MVE_VMULi16 killed renamable $q1, killed renamable $q0, 0, $noreg, undef renamable $q0
328  ; CHECK:   MVE_VSTRHU16 killed renamable $q0, renamable $r0, 0, 0, killed $noreg :: (store 16 into %ir.lsr.iv1, align 2)
329  ; CHECK:   renamable $r1, dead $cpsr = tADDi8 killed renamable $r1, 16, 14 /* CC::al */, $noreg
330  ; CHECK:   renamable $r2, dead $cpsr = tADDi8 killed renamable $r2, 16, 14 /* CC::al */, $noreg
331  ; CHECK:   renamable $r0, dead $cpsr = tADDi8 killed renamable $r0, 16, 14 /* CC::al */, $noreg
332  ; CHECK:   $lr = MVE_LETP killed renamable $lr, %bb.1
333  ; CHECK: bb.2.for.cond.cleanup:
334  ; CHECK:   tPOP_RET 14 /* CC::al */, $noreg, def $r7, def $pc
335  bb.0.entry:
336    successors: %bb.2(0x40000000), %bb.1(0x40000000)
337    liveins: $r0, $r1, $r2, $r3, $r7, $lr
338
339    frame-setup tPUSH 14, $noreg, killed $r7, killed $lr, implicit-def $sp, implicit $sp
340    frame-setup CFI_INSTRUCTION def_cfa_offset 8
341    frame-setup CFI_INSTRUCTION offset $lr, -4
342    frame-setup CFI_INSTRUCTION offset $r7, -8
343    renamable $r12 = t2ADDri renamable $r3, 7, 14, $noreg, $noreg
344    renamable $lr = t2MOVi 1, 14, $noreg, $noreg
345    renamable $r12 = t2BICri killed renamable $r12, 7, 14, $noreg, $noreg
346    renamable $r12 = t2SUBri killed renamable $r12, 8, 14, $noreg, $noreg
347    renamable $lr = nuw nsw t2ADDrs killed renamable $lr, killed renamable $r12, 27, 14, $noreg, $noreg
348    t2WhileLoopStart renamable $lr, %bb.1, implicit-def dead $cpsr
349    tB %bb.2, 14, $noreg
350
351  bb.1.vector.body:
352    successors: %bb.2(0x04000000), %bb.1(0x7c000000)
353    liveins: $lr, $r0, $r1, $r2, $r3
354
355    renamable $vpr = MVE_VCTP16 renamable $r3, 0, $noreg
356    MVE_VPST 4, implicit $vpr
357    renamable $q0 = MVE_VLDRHU16 renamable $r1, 0, 1, renamable $vpr :: (load 16 from %ir.lsr.iv57, align 2)
358    renamable $q1 = MVE_VLDRHU16 renamable $r2, 0, 1, renamable $vpr :: (load 16 from %ir.lsr.iv24, align 2)
359    renamable $q0 = MVE_VMULi16 killed renamable $q1, killed renamable $q0, 0, $noreg, undef renamable $q0
360    MVE_VPST 8, implicit $vpr
361    MVE_VSTRHU16 killed renamable $q0, renamable $r0, 0, 1, killed renamable $vpr :: (store 16 into %ir.lsr.iv1, align 2)
362    renamable $r1, dead $cpsr = tADDi8 killed renamable $r1, 16, 14, $noreg
363    renamable $r2, dead $cpsr = tADDi8 killed renamable $r2, 16, 14, $noreg
364    renamable $r0, dead $cpsr = tADDi8 killed renamable $r0, 16, 14, $noreg
365    renamable $r3, dead $cpsr = tSUBi8 killed renamable $r3, 8, 14, $noreg
366    renamable $lr = t2LoopDec killed renamable $lr, 1
367    t2LoopEnd renamable $lr, %bb.1, implicit-def dead $cpsr
368    tB %bb.2, 14, $noreg
369
370  bb.2.for.cond.cleanup:
371    tPOP_RET 14, $noreg, def $r7, def $pc
372
373...
374---
375name:            test_wlstp32
376alignment:       2
377exposesReturnsTwice: false
378legalized:       false
379regBankSelected: false
380selected:        false
381failedISel:      false
382tracksRegLiveness: true
383hasWinCFI:       false
384registers:       []
385liveins:
386  - { reg: '$r0', virtual-reg: '' }
387  - { reg: '$r1', virtual-reg: '' }
388  - { reg: '$r2', virtual-reg: '' }
389frameInfo:
390  isFrameAddressTaken: false
391  isReturnAddressTaken: false
392  hasStackMap:     false
393  hasPatchPoint:   false
394  stackSize:       8
395  offsetAdjustment: 0
396  maxAlignment:    4
397  adjustsStack:    false
398  hasCalls:        false
399  stackProtector:  ''
400  maxCallFrameSize: 0
401  cvBytesOfCalleeSavedRegisters: 0
402  hasOpaqueSPAdjustment: false
403  hasVAStart:      false
404  hasMustTailInVarArgFunc: false
405  localFrameSize:  0
406  savePoint:       ''
407  restorePoint:    ''
408fixedStack:      []
409stack:
410  - { id: 0, name: '', type: spill-slot, offset: -4, size: 4, alignment: 4,
411      stack-id: default, callee-saved-register: '$lr', callee-saved-restored: false,
412      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
413  - { id: 1, name: '', type: spill-slot, offset: -8, size: 4, alignment: 4,
414      stack-id: default, callee-saved-register: '$r7', callee-saved-restored: true,
415      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
416callSites:       []
417constants:       []
418machineFunctionInfo: {}
419body:             |
420  ; CHECK-LABEL: name: test_wlstp32
421  ; CHECK: bb.0.entry:
422  ; CHECK:   successors: %bb.4(0x40000000), %bb.1(0x40000000)
423  ; CHECK:   liveins: $lr, $r0, $r1, $r2, $r7
424  ; CHECK:   frame-setup tPUSH 14 /* CC::al */, $noreg, killed $r7, killed $lr, implicit-def $sp, implicit $sp
425  ; CHECK:   frame-setup CFI_INSTRUCTION def_cfa_offset 8
426  ; CHECK:   frame-setup CFI_INSTRUCTION offset $lr, -4
427  ; CHECK:   frame-setup CFI_INSTRUCTION offset $r7, -8
428  ; CHECK:   renamable $r3, dead $cpsr = tADDi3 renamable $r2, 3, 14 /* CC::al */, $noreg
429  ; CHECK:   renamable $r3 = t2BICri killed renamable $r3, 3, 14 /* CC::al */, $noreg, $noreg
430  ; CHECK:   renamable $r12 = t2SUBri killed renamable $r3, 4, 14 /* CC::al */, $noreg, $noreg
431  ; CHECK:   renamable $r3, dead $cpsr = tMOVi8 1, 14 /* CC::al */, $noreg
432  ; CHECK:   renamable $lr = nuw nsw t2ADDrs killed renamable $r3, killed renamable $r12, 19, 14 /* CC::al */, $noreg, $noreg
433  ; CHECK:   renamable $r12 = t2MOVi 0, 14 /* CC::al */, $noreg, $noreg
434  ; CHECK:   $lr = t2WLS killed renamable $lr, %bb.1
435  ; CHECK:   tB %bb.4, 14 /* CC::al */, $noreg
436  ; CHECK: bb.1.vector.ph:
437  ; CHECK:   successors: %bb.2(0x80000000)
438  ; CHECK:   liveins: $lr, $r0, $r1, $r2
439  ; CHECK:   renamable $q1 = MVE_VMOVimmi32 0, 0, $noreg, undef renamable $q1
440  ; CHECK: bb.2.vector.body:
441  ; CHECK:   successors: %bb.3(0x04000000), %bb.2(0x7c000000)
442  ; CHECK:   liveins: $lr, $q1, $r0, $r1, $r2
443  ; CHECK:   $q0 = MVE_VORR killed $q1, killed $q1, 0, $noreg, undef $q0
444  ; CHECK:   renamable $vpr = MVE_VCTP32 $r2, 0, $noreg
445  ; CHECK:   MVE_VPST 4, implicit $vpr
446  ; CHECK:   renamable $q1 = MVE_VLDRWU32 renamable $r0, 0, 1, renamable $vpr :: (load 16 from %ir.lsr.iv24, align 4)
447  ; CHECK:   renamable $q2 = MVE_VLDRWU32 renamable $r1, 0, 1, renamable $vpr :: (load 16 from %ir.lsr.iv1, align 4)
448  ; CHECK:   $r3 = tMOVr $r2, 14 /* CC::al */, $noreg
449  ; CHECK:   renamable $q1 = nsw MVE_VMULi32 killed renamable $q2, killed renamable $q1, 0, $noreg, undef renamable $q1
450  ; CHECK:   renamable $r0, dead $cpsr = tADDi8 killed renamable $r0, 16, 14 /* CC::al */, $noreg
451  ; CHECK:   renamable $r1, dead $cpsr = tADDi8 killed renamable $r1, 16, 14 /* CC::al */, $noreg
452  ; CHECK:   renamable $r2, dead $cpsr = tSUBi8 killed $r2, 4, 14 /* CC::al */, $noreg
453  ; CHECK:   MVE_VPST 8, implicit $vpr
454  ; CHECK:   renamable $q1 = nsw MVE_VADDi32 killed renamable $q1, renamable $q0, 1, killed renamable $vpr, undef renamable $q1
455  ; CHECK:   $lr = t2LEUpdate killed renamable $lr, %bb.2
456  ; CHECK: bb.3.middle.block:
457  ; CHECK:   successors: %bb.4(0x80000000)
458  ; CHECK:   liveins: $q0, $q1, $r3
459  ; CHECK:   renamable $vpr = MVE_VCTP32 killed renamable $r3, 0, $noreg
460  ; CHECK:   renamable $q0 = MVE_VPSEL killed renamable $q1, killed renamable $q0, 0, killed renamable $vpr
461  ; CHECK:   renamable $r12 = MVE_VADDVu32no_acc killed renamable $q0, 0, $noreg
462  ; CHECK: bb.4.for.cond.cleanup:
463  ; CHECK:   liveins: $r12
464  ; CHECK:   $r0 = tMOVr killed $r12, 14 /* CC::al */, $noreg
465  ; CHECK:   tPOP_RET 14 /* CC::al */, $noreg, def $r7, def $pc, implicit killed $r0
466  bb.0.entry:
467    successors: %bb.4(0x40000000), %bb.1(0x40000000)
468    liveins: $r0, $r1, $r2, $r7, $lr
469
470    frame-setup tPUSH 14, $noreg, killed $r7, killed $lr, implicit-def $sp, implicit $sp
471    frame-setup CFI_INSTRUCTION def_cfa_offset 8
472    frame-setup CFI_INSTRUCTION offset $lr, -4
473    frame-setup CFI_INSTRUCTION offset $r7, -8
474    renamable $r3, dead $cpsr = tADDi3 renamable $r2, 3, 14, $noreg
475    renamable $r3 = t2BICri killed renamable $r3, 3, 14, $noreg, $noreg
476    renamable $r12 = t2SUBri killed renamable $r3, 4, 14, $noreg, $noreg
477    renamable $r3, dead $cpsr = tMOVi8 1, 14, $noreg
478    renamable $lr = nuw nsw t2ADDrs killed renamable $r3, killed renamable $r12, 19, 14, $noreg, $noreg
479    renamable $r12 = t2MOVi 0, 14, $noreg, $noreg
480    t2WhileLoopStart renamable $lr, %bb.1, implicit-def dead $cpsr
481    tB %bb.4, 14, $noreg
482
483  bb.1.vector.ph:
484    successors: %bb.2(0x80000000)
485    liveins: $lr, $r0, $r1, $r2
486
487    renamable $q1 = MVE_VMOVimmi32 0, 0, $noreg, undef renamable $q1
488
489  bb.2.vector.body:
490    successors: %bb.3(0x04000000), %bb.2(0x7c000000)
491    liveins: $lr, $q1, $r0, $r1, $r2
492
493    $q0 = MVE_VORR killed $q1, $q1, 0, $noreg, undef $q0
494    renamable $vpr = MVE_VCTP32 $r2, 0, $noreg
495    MVE_VPST 4, implicit $vpr
496    renamable $q1 = MVE_VLDRWU32 renamable $r0, 0, 1, renamable $vpr :: (load 16 from %ir.lsr.iv24, align 4)
497    renamable $q2 = MVE_VLDRWU32 renamable $r1, 0, 1, renamable $vpr :: (load 16 from %ir.lsr.iv1, align 4)
498    $r3 = tMOVr $r2, 14, $noreg
499    renamable $q1 = nsw MVE_VMULi32 killed renamable $q2, killed renamable $q1, 0, $noreg, undef renamable $q1
500    renamable $r0, dead $cpsr = tADDi8 killed renamable $r0, 16, 14, $noreg
501    renamable $r1, dead $cpsr = tADDi8 killed renamable $r1, 16, 14, $noreg
502    renamable $r2, dead $cpsr = tSUBi8 killed $r2, 4, 14, $noreg
503    MVE_VPST 8, implicit $vpr
504    renamable $q1 = nsw MVE_VADDi32 killed renamable $q1, renamable $q0, 1, renamable $vpr, undef renamable $q1
505    renamable $lr = t2LoopDec killed renamable $lr, 1
506    t2LoopEnd renamable $lr, %bb.2, implicit-def dead $cpsr
507    tB %bb.3, 14, $noreg
508
509  bb.3.middle.block:
510    successors: %bb.4(0x80000000)
511    liveins: $q0, $q1, $r3
512
513    renamable $vpr = MVE_VCTP32 killed renamable $r3, 0, $noreg
514    renamable $q0 = MVE_VPSEL killed renamable $q1, killed renamable $q0, 0, killed renamable $vpr
515    renamable $r12 = MVE_VADDVu32no_acc killed renamable $q0, 0, $noreg
516
517  bb.4.for.cond.cleanup:
518    liveins: $r12
519
520    $r0 = tMOVr killed $r12, 14, $noreg
521    tPOP_RET 14, $noreg, def $r7, def $pc, implicit killed $r0
522
523...
524