1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -O3 -mtriple=thumbv8.1m.main-none-none-eabi --verify-machineinstrs -mattr=+mve.fp %s -o - | FileCheck %s
3
4declare <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32>, <4 x i32>, <4 x i1>, <4 x i32>)
5
6define arm_aapcs_vfpcc <4 x i32> @vpt_block(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
7; CHECK-LABEL: vpt_block:
8; CHECK:       @ %bb.0: @ %entry
9; CHECK-NEXT:    vpt.s32 ge, q0, q2
10; CHECK-NEXT:    vorrt q0, q1, q2
11; CHECK-NEXT:    bx lr
12entry:
13  %0 = icmp sge <4 x i32> %a, %c
14  %1 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %b, <4 x i32> %c, <4 x i1> %0, <4 x i32> %a)
15  ret <4 x i32> %1
16}
17
18define arm_aapcs_vfpcc <4 x i32> @vptt_block(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
19; CHECK-LABEL: vptt_block:
20; CHECK:       @ %bb.0: @ %entry
21; CHECK-NEXT:    vmov q3, q0
22; CHECK-NEXT:    vptt.s32 ge, q0, q2
23; CHECK-NEXT:    vorrt q3, q1, q2
24; CHECK-NEXT:    vorrt q0, q3, q2
25; CHECK-NEXT:    bx lr
26entry:
27  %0 = icmp sge <4 x i32> %a, %c
28  %1 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %b, <4 x i32> %c, <4 x i1> %0, <4 x i32> %a)
29  %2 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %1, <4 x i32> %c, <4 x i1> %0, <4 x i32> %a)
30  ret <4 x i32> %2
31}
32
33define arm_aapcs_vfpcc <4 x i32> @vpttt_block(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
34; CHECK-LABEL: vpttt_block:
35; CHECK:       @ %bb.0: @ %entry
36; CHECK-NEXT:    vpttt.s32 ge, q0, q2
37; CHECK-NEXT:    vorrt q0, q1, q2
38; CHECK-NEXT:    vorrt q0, q1, q2
39; CHECK-NEXT:    vorrt q0, q1, q2
40; CHECK-NEXT:    bx lr
41entry:
42  %0 = icmp sge <4 x i32> %a, %c
43  %1 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %b, <4 x i32> %c, <4 x i1> %0, <4 x i32> %a)
44  %2 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %b, <4 x i32> %c, <4 x i1> %0, <4 x i32> %1)
45  %3 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %b, <4 x i32> %c, <4 x i1> %0, <4 x i32> %2)
46  ret <4 x i32> %3
47}
48
49define arm_aapcs_vfpcc <4 x i32> @vptttt_block(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
50; CHECK-LABEL: vptttt_block:
51; CHECK:       @ %bb.0: @ %entry
52; CHECK-NEXT:    vptttt.s32 ge, q0, q2
53; CHECK-NEXT:    vorrt q0, q1, q2
54; CHECK-NEXT:    vorrt q0, q1, q2
55; CHECK-NEXT:    vorrt q0, q1, q2
56; CHECK-NEXT:    vorrt q0, q1, q2
57; CHECK-NEXT:    bx lr
58entry:
59  %0 = icmp sge <4 x i32> %a, %c
60  %1 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %b, <4 x i32> %c, <4 x i1> %0, <4 x i32> %a)
61  %2 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %b, <4 x i32> %c, <4 x i1> %0, <4 x i32> %1)
62  %3 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %b, <4 x i32> %c, <4 x i1> %0, <4 x i32> %2)
63  %4 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %b, <4 x i32> %c, <4 x i1> %0, <4 x i32> %3)
64  ret <4 x i32> %4
65}
66
67
68define arm_aapcs_vfpcc <4 x i32> @vpte_block(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
69; CHECK-LABEL: vpte_block:
70; CHECK:       @ %bb.0: @ %entry
71; CHECK-NEXT:    vpte.s32 ge, q0, q2
72; CHECK-NEXT:    vorrt q0, q1, q2
73; CHECK-NEXT:    vmove q0, q2
74; CHECK-NEXT:    bx lr
75entry:
76  %0 = icmp sge <4 x i32> %a, %c
77  %1 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %b, <4 x i32> %c, <4 x i1> %0, <4 x i32> %a)
78  %2 = xor <4 x i1> %0, <i1 true, i1 true, i1 true, i1 true>
79  %3 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %c, <4 x i32> %c, <4 x i1> %2, <4 x i32> %1)
80  ret <4 x i32> %3
81}
82
83define arm_aapcs_vfpcc <4 x i32> @vptte_block(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
84; CHECK-LABEL: vptte_block:
85; CHECK:       @ %bb.0: @ %entry
86; CHECK-NEXT:    vptte.s32 ge, q0, q2
87; CHECK-NEXT:    vorrt q0, q1, q2
88; CHECK-NEXT:    vorrt q0, q1, q2
89; CHECK-NEXT:    vorre q0, q1, q2
90; CHECK-NEXT:    bx lr
91entry:
92  %0 = icmp sge <4 x i32> %a, %c
93  %1 = xor <4 x i1> %0, <i1 true, i1 true, i1 true, i1 true>
94  %2 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %b, <4 x i32> %c, <4 x i1> %0, <4 x i32> %a)
95  %3 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %b, <4 x i32> %c, <4 x i1> %0, <4 x i32> %2)
96  %4 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %b, <4 x i32> %c, <4 x i1> %1, <4 x i32> %3)
97  ret <4 x i32> %4
98}
99
100define arm_aapcs_vfpcc <4 x i32> @vptee_block(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
101; CHECK-LABEL: vptee_block:
102; CHECK:       @ %bb.0: @ %entry
103; CHECK-NEXT:    vptee.s32 ge, q0, q2
104; CHECK-NEXT:    vorrt q0, q1, q2
105; CHECK-NEXT:    vorre q0, q1, q2
106; CHECK-NEXT:    vorre q0, q1, q2
107; CHECK-NEXT:    bx lr
108entry:
109  %0 = icmp sge <4 x i32> %a, %c
110  %1 = xor <4 x i1> %0, <i1 true, i1 true, i1 true, i1 true>
111  %2 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %b, <4 x i32> %c, <4 x i1> %0, <4 x i32> %a)
112  %3 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %b, <4 x i32> %c, <4 x i1> %1, <4 x i32> %2)
113  %4 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %b, <4 x i32> %c, <4 x i1> %1, <4 x i32> %3)
114  ret <4 x i32> %4
115}
116
117define arm_aapcs_vfpcc <4 x i32> @vptet_block(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
118; CHECK-LABEL: vptet_block:
119; CHECK:       @ %bb.0: @ %entry
120; CHECK-NEXT:    vptet.s32 ge, q0, q2
121; CHECK-NEXT:    vorrt q0, q1, q2
122; CHECK-NEXT:    vmove q0, q2
123; CHECK-NEXT:    vmovt q0, q2
124; CHECK-NEXT:    bx lr
125entry:
126  %0 = icmp sge <4 x i32> %a, %c
127  %1 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %b, <4 x i32> %c, <4 x i1> %0, <4 x i32> %a)
128  %2 = xor <4 x i1> %0, <i1 true, i1 true, i1 true, i1 true>
129  %3 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %c, <4 x i32> %c, <4 x i1> %2, <4 x i32> %1)
130  %4 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %c, <4 x i32> %c, <4 x i1> %0, <4 x i32> %3)
131  ret <4 x i32> %4
132}
133
134define arm_aapcs_vfpcc <4 x i32> @vpttet_block(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
135; CHECK-LABEL: vpttet_block:
136; CHECK:       @ %bb.0: @ %entry
137; CHECK-NEXT:    vpttet.s32 ge, q0, q2
138; CHECK-NEXT:    vorrt q0, q1, q2
139; CHECK-NEXT:    vmovt q0, q2
140; CHECK-NEXT:    vmove q0, q2
141; CHECK-NEXT:    vmovt q0, q2
142; CHECK-NEXT:    bx lr
143entry:
144  %0 = icmp sge <4 x i32> %a, %c
145  %1 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %b, <4 x i32> %c, <4 x i1> %0, <4 x i32> %a)
146  %2 = xor <4 x i1> %0, <i1 true, i1 true, i1 true, i1 true>
147  %3 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %c, <4 x i32> %c, <4 x i1> %0, <4 x i32> %1)
148  %4 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %c, <4 x i32> %c, <4 x i1> %2, <4 x i32> %3)
149  %5 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %c, <4 x i32> %c, <4 x i1> %0, <4 x i32> %4)
150  ret <4 x i32> %5
151}
152
153define arm_aapcs_vfpcc <4 x i32> @vptett_block(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
154; CHECK-LABEL: vptett_block:
155; CHECK:       @ %bb.0: @ %entry
156; CHECK-NEXT:    vptett.s32 ge, q0, q2
157; CHECK-NEXT:    vorrt q0, q1, q2
158; CHECK-NEXT:    vmove q0, q2
159; CHECK-NEXT:    vmovt q0, q2
160; CHECK-NEXT:    vmovt q0, q2
161; CHECK-NEXT:    bx lr
162entry:
163  %0 = icmp sge <4 x i32> %a, %c
164  %1 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %b, <4 x i32> %c, <4 x i1> %0, <4 x i32> %a)
165  %2 = xor <4 x i1> %0, <i1 true, i1 true, i1 true, i1 true>
166  %3 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %c, <4 x i32> %c, <4 x i1> %2, <4 x i32> %1)
167  %4 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %c, <4 x i32> %c, <4 x i1> %0, <4 x i32> %3)
168  %5 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %c, <4 x i32> %c, <4 x i1> %0, <4 x i32> %4)
169  ret <4 x i32> %5
170}
171
172define arm_aapcs_vfpcc <4 x i32> @vpteet_block(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
173; CHECK-LABEL: vpteet_block:
174; CHECK:       @ %bb.0: @ %entry
175; CHECK-NEXT:    vpteet.s32 ge, q0, q2
176; CHECK-NEXT:    vorrt q0, q1, q2
177; CHECK-NEXT:    vmove q0, q2
178; CHECK-NEXT:    vmove q0, q2
179; CHECK-NEXT:    vmovt q0, q2
180; CHECK-NEXT:    bx lr
181entry:
182  %0 = icmp sge <4 x i32> %a, %c
183  %1 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %b, <4 x i32> %c, <4 x i1> %0, <4 x i32> %a)
184  %2 = xor <4 x i1> %0, <i1 true, i1 true, i1 true, i1 true>
185  %3 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %c, <4 x i32> %c, <4 x i1> %2, <4 x i32> %1)
186  %4 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %c, <4 x i32> %c, <4 x i1> %2, <4 x i32> %3)
187  %5 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %c, <4 x i32> %c, <4 x i1> %0, <4 x i32> %4)
188  ret <4 x i32> %5
189}
190
191define arm_aapcs_vfpcc <4 x i32> @vpteee_block(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
192; CHECK-LABEL: vpteee_block:
193; CHECK:       @ %bb.0: @ %entry
194; CHECK-NEXT:    vpteee.s32 ge, q0, q2
195; CHECK-NEXT:    vorrt q0, q1, q2
196; CHECK-NEXT:    vmove q0, q2
197; CHECK-NEXT:    vmove q0, q2
198; CHECK-NEXT:    vmove q0, q2
199; CHECK-NEXT:    bx lr
200entry:
201  %0 = icmp sge <4 x i32> %a, %c
202  %1 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %b, <4 x i32> %c, <4 x i1> %0, <4 x i32> %a)
203  %2 = xor <4 x i1> %0, <i1 true, i1 true, i1 true, i1 true>
204  %3 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %c, <4 x i32> %c, <4 x i1> %2, <4 x i32> %1)
205  %4 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %c, <4 x i32> %c, <4 x i1> %2, <4 x i32> %3)
206  %5 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %c, <4 x i32> %c, <4 x i1> %2, <4 x i32> %4)
207  ret <4 x i32> %5
208}
209
210define arm_aapcs_vfpcc <4 x i32> @vptete_block(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
211; CHECK-LABEL: vptete_block:
212; CHECK:       @ %bb.0: @ %entry
213; CHECK-NEXT:    vptete.s32 ge, q0, q2
214; CHECK-NEXT:    vorrt q0, q1, q2
215; CHECK-NEXT:    vmove q0, q2
216; CHECK-NEXT:    vmovt q0, q2
217; CHECK-NEXT:    vmove q0, q2
218; CHECK-NEXT:    bx lr
219entry:
220  %0 = icmp sge <4 x i32> %a, %c
221  %1 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %b, <4 x i32> %c, <4 x i1> %0, <4 x i32> %a)
222  %2 = xor <4 x i1> %0, <i1 true, i1 true, i1 true, i1 true>
223  %3 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %c, <4 x i32> %c, <4 x i1> %2, <4 x i32> %1)
224  %4 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %c, <4 x i32> %c, <4 x i1> %0, <4 x i32> %3)
225  %5 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %c, <4 x i32> %c, <4 x i1> %2, <4 x i32> %4)
226  ret <4 x i32> %5
227}
228
229define arm_aapcs_vfpcc <4 x i32> @vpttte_block(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
230; CHECK-LABEL: vpttte_block:
231; CHECK:       @ %bb.0: @ %entry
232; CHECK-NEXT:    vpttte.s32 ge, q0, q2
233; CHECK-NEXT:    vorrt q0, q1, q2
234; CHECK-NEXT:    vmovt q0, q2
235; CHECK-NEXT:    vmovt q0, q2
236; CHECK-NEXT:    vmove q0, q2
237; CHECK-NEXT:    bx lr
238entry:
239  %0 = icmp sge <4 x i32> %a, %c
240  %1 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %b, <4 x i32> %c, <4 x i1> %0, <4 x i32> %a)
241  %2 = xor <4 x i1> %0, <i1 true, i1 true, i1 true, i1 true>
242  %3 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %c, <4 x i32> %c, <4 x i1> %0, <4 x i32> %1)
243  %4 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %c, <4 x i32> %c, <4 x i1> %0, <4 x i32> %3)
244  %5 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %c, <4 x i32> %c, <4 x i1> %2, <4 x i32> %4)
245  ret <4 x i32> %5
246}
247
248define arm_aapcs_vfpcc <4 x i32> @vpttee_block(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
249; CHECK-LABEL: vpttee_block:
250; CHECK:       @ %bb.0: @ %entry
251; CHECK-NEXT:    vpttee.s32 ge, q0, q2
252; CHECK-NEXT:    vorrt q0, q1, q2
253; CHECK-NEXT:    vmovt q0, q2
254; CHECK-NEXT:    vmove q0, q2
255; CHECK-NEXT:    vmove q0, q2
256; CHECK-NEXT:    bx lr
257entry:
258  %0 = icmp sge <4 x i32> %a, %c
259  %1 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %b, <4 x i32> %c, <4 x i1> %0, <4 x i32> %a)
260  %2 = xor <4 x i1> %0, <i1 true, i1 true, i1 true, i1 true>
261  %3 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %c, <4 x i32> %c, <4 x i1> %0, <4 x i32> %1)
262  %4 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %c, <4 x i32> %c, <4 x i1> %2, <4 x i32> %3)
263  %5 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %c, <4 x i32> %c, <4 x i1> %2, <4 x i32> %4)
264  ret <4 x i32> %5
265}
266