1# RUN: llc -march=amdgcn -verify-machineinstrs -run-pass si-lower-control-flow -o - %s | FileCheck %s
2# Getting an undef that is specifically a VGPR is tricky from IR
3
4# CHECK-LABEL: name: extract_undef_offset_vgpr{{$}}
5# CHECK: bb.1:
6# CHECK: successors: %bb.2(0x40000000 / 0x80000000 = 50.00%), %bb.1(0x40000000 / 0x80000000 = 50.00%)
7# CHECK: liveins: %vgpr0_vgpr1_vgpr2_vgpr3{{$}}
8
9# CHECK: V_READFIRSTLANE_B32 undef %vgpr10, implicit %exec
10# CHECK: %vgpr0 = V_MOVRELS_B32_e32 %vgpr0, implicit %m0, implicit %exec, implicit %vgpr0_vgpr1_vgpr2_vgpr3
11# CHECK: S_CBRANCH_EXECNZ %bb.1, implicit %exec
12
13# CHECK: bb.2:
14# CHECK: liveins: %sgpr6_sgpr7, %sgpr4_sgpr5_sgpr6_sgpr7, %sgpr4, %sgpr5, %sgpr6, %sgpr7, %sgpr4_sgpr5, %vgpr0_vgpr1_vgpr2_vgpr3, %vgpr0, %vgpr1, %vgpr2, %vgpr3, %vgpr0_vgpr1, %vgpr2_vgpr3, %vgpr0_vgpr1_vgpr2, %vgpr1_vgpr2, %vgpr1_vgpr2_vgpr3, %sgpr0_sgpr1, %sgpr0, %sgpr1{{$}}
15
16
17--- |
18  target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"
19
20  define void @extract_undef_offset_vgpr(i32 addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
21  entry:
22    %ld = load volatile <4 x i32>, <4 x i32> addrspace(1)* %in
23    %value = extractelement <4 x i32> %ld, i32 undef
24    store i32 %value, i32 addrspace(1)* %out
25    ret void
26  }
27
28  define void @extract_undef_neg_offset_vgpr(i32 addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
29  entry:
30    %ld = load volatile <4 x i32>, <4 x i32> addrspace(1)* %in
31    %value = extractelement <4 x i32> %ld, i32 undef
32    store i32 %value, i32 addrspace(1)* %out
33    ret void
34  }
35
36  define void @insert_undef_offset_vgpr(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
37  entry:
38    %ld = load <4 x i32>, <4 x i32> addrspace(1)* %in
39    %value = insertelement <4 x i32> %ld, i32 5, i32 undef
40    store <4 x i32> %value, <4 x i32> addrspace(1)* %out
41    ret void
42  }
43
44  define void @insert_undef_neg_offset_vgpr(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
45  entry:
46    %ld = load <4 x i32>, <4 x i32> addrspace(1)* %in
47    %value = insertelement <4 x i32> %ld, i32 5, i32 undef
48    store <4 x i32> %value, <4 x i32> addrspace(1)* %out
49    ret void
50  }
51
52  define void @insert_undef_value_offset_vgpr(<4 x i32> addrspace(1)*%out, <4 x i32> addrspace(1)* %in, i32 %idx) {
53  entry:
54    %ld = load <4 x i32>, <4 x i32> addrspace(1)* %in
55    %value = insertelement <4 x i32> %ld, i32 undef, i32 %idx
56    store <4 x i32> %value, <4 x i32> addrspace(1)* %out
57    ret void
58  }
59
60...
61---
62name:            extract_undef_offset_vgpr
63alignment:       0
64exposesReturnsTwice: false
65hasInlineAsm:    false
66allVRegsAllocated: true
67isSSA:           false
68tracksRegLiveness: true
69tracksSubRegLiveness: true
70liveins:
71  - { reg: '%sgpr0_sgpr1' }
72frameInfo:
73  isFrameAddressTaken: false
74  isReturnAddressTaken: false
75  hasStackMap:     false
76  hasPatchPoint:   false
77  stackSize:       0
78  offsetAdjustment: 0
79  maxAlignment:    0
80  adjustsStack:    false
81  hasCalls:        false
82  maxCallFrameSize: 0
83  hasOpaqueSPAdjustment: false
84  hasVAStart:      false
85  hasMustTailInVarArgFunc: false
86body:             |
87  bb.0.entry:
88    liveins: %sgpr0_sgpr1
89
90    %sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM %sgpr0_sgpr1, 11
91    %sgpr7 = S_MOV_B32 61440
92    %sgpr6 = S_MOV_B32 -1
93    S_WAITCNT 127
94    %vgpr0_vgpr1_vgpr2_vgpr3 = BUFFER_LOAD_DWORDX4_OFFSET %sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit %exec
95    %sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed %sgpr0_sgpr1, 9
96    S_WAITCNT 3952
97    %vgpr0, dead %sgpr0_sgpr1 = SI_INDIRECT_SRC_V4 killed %vgpr0_vgpr1_vgpr2_vgpr3, undef %vgpr10, 0, implicit-def dead %exec, implicit-def dead %vcc, implicit-def dead %m0, implicit %exec
98    S_WAITCNT 127
99    BUFFER_STORE_DWORD_OFFSET killed %vgpr0, killed %sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit %exec
100    S_ENDPGM
101
102...
103
104# CHECK-LABEL: name: extract_undef_neg_offset_vgpr{{$}}
105# CHECK: bb.1:
106# CHECK: successors: %bb.2(0x40000000 / 0x80000000 = 50.00%), %bb.1(0x40000000 / 0x80000000 = 50.00%)
107# CHECK: liveins: %vgpr0_vgpr1_vgpr2_vgpr3{{$}}
108
109# CHECK: %vcc_lo = V_READFIRSTLANE_B32 undef %vgpr10, implicit %exec
110# CHECK: %m0 = S_MOV_B32 %vcc_lo
111# CHECK: %m0 = S_ADD_I32 %m0, -7, implicit-def %scc
112# CHECK: %vgpr0 = V_MOVRELS_B32_e32 %vgpr0, implicit %m0, implicit %exec, implicit %vgpr0_vgpr1_vgpr2_vgpr3
113# CHECK: S_CBRANCH_EXECNZ %bb.1, implicit %exec
114
115# CHECK: bb.2:
116# CHECK: liveins: %sgpr6_sgpr7, %sgpr4_sgpr5_sgpr6_sgpr7, %sgpr4, %sgpr5, %sgpr6, %sgpr7, %sgpr4_sgpr5, %vgpr0_vgpr1_vgpr2_vgpr3, %vgpr0, %vgpr1, %vgpr2, %vgpr3, %vgpr0_vgpr1, %vgpr2_vgpr3, %vgpr0_vgpr1_vgpr2, %vgpr1_vgpr2, %vgpr1_vgpr2_vgpr3, %sgpr0_sgpr1, %sgpr0, %sgpr1
117
118name:            extract_undef_neg_offset_vgpr
119alignment:       0
120exposesReturnsTwice: false
121hasInlineAsm:    false
122allVRegsAllocated: true
123isSSA:           false
124tracksRegLiveness: true
125tracksSubRegLiveness: true
126liveins:
127  - { reg: '%sgpr0_sgpr1' }
128frameInfo:
129  isFrameAddressTaken: false
130  isReturnAddressTaken: false
131  hasStackMap:     false
132  hasPatchPoint:   false
133  stackSize:       0
134  offsetAdjustment: 0
135  maxAlignment:    0
136  adjustsStack:    false
137  hasCalls:        false
138  maxCallFrameSize: 0
139  hasOpaqueSPAdjustment: false
140  hasVAStart:      false
141  hasMustTailInVarArgFunc: false
142body:             |
143  bb.0.entry:
144    liveins: %sgpr0_sgpr1
145
146    %sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM %sgpr0_sgpr1, 11
147    %sgpr7 = S_MOV_B32 61440
148    %sgpr6 = S_MOV_B32 -1
149    S_WAITCNT 127
150    %vgpr0_vgpr1_vgpr2_vgpr3 = BUFFER_LOAD_DWORDX4_OFFSET %sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit %exec
151    %sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed %sgpr0_sgpr1, 9
152    S_WAITCNT 3952
153    %vgpr0, dead %sgpr0_sgpr1 = SI_INDIRECT_SRC_V4 killed %vgpr0_vgpr1_vgpr2_vgpr3, undef %vgpr10, -7, implicit-def dead %exec, implicit-def dead %vcc, implicit-def dead %m0, implicit %exec
154    S_WAITCNT 127
155    BUFFER_STORE_DWORD_OFFSET killed %vgpr0, killed %sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit %exec
156    S_ENDPGM
157
158...
159
160# CHECK-LABEL: name: insert_undef_offset_vgpr{{$}}
161# CHECK: bb.1:
162# CHECK: successors: %bb.2(0x40000000 / 0x80000000 = 50.00%), %bb.1(0x40000000 / 0x80000000 = 50.00%)
163# CHECK: liveins: %vgpr4, %vgpr0_vgpr1_vgpr2_vgpr3{{$}}
164
165# CHECK: %vcc_lo = V_READFIRSTLANE_B32 undef %vgpr10, implicit %exec
166# CHECK: %m0 = S_MOV_B32 %vcc_lo
167# CHECK: %vgpr0 = V_MOVRELD_B32_e32 %vgpr4, implicit %m0, implicit %exec, implicit %vgpr0_vgpr1_vgpr2_vgpr3
168# CHECK: S_CBRANCH_EXECNZ %bb.1, implicit %exec
169
170# CHECK: bb.2:
171# CHECK: liveins: %sgpr6_sgpr7, %sgpr7, %sgpr4_sgpr5, %sgpr5, %sgpr4_sgpr5_sgpr6_sgpr7, %sgpr6, %sgpr4, %vgpr0_vgpr1_vgpr2_vgpr3, %vgpr0, %vgpr1, %vgpr2, %vgpr3, %vgpr0_vgpr1, %vgpr2_vgpr3, %vgpr0_vgpr1_vgpr2, %vgpr1_vgpr2, %vgpr1_vgpr2_vgpr3, %vgpr4, %sgpr0_sgpr1, %sgpr0, %sgpr1
172
173name:            insert_undef_offset_vgpr
174alignment:       0
175exposesReturnsTwice: false
176hasInlineAsm:    false
177allVRegsAllocated: true
178isSSA:           false
179tracksRegLiveness: true
180tracksSubRegLiveness: true
181liveins:
182  - { reg: '%sgpr0_sgpr1' }
183frameInfo:
184  isFrameAddressTaken: false
185  isReturnAddressTaken: false
186  hasStackMap:     false
187  hasPatchPoint:   false
188  stackSize:       0
189  offsetAdjustment: 0
190  maxAlignment:    0
191  adjustsStack:    false
192  hasCalls:        false
193  maxCallFrameSize: 0
194  hasOpaqueSPAdjustment: false
195  hasVAStart:      false
196  hasMustTailInVarArgFunc: false
197body:             |
198  bb.0.entry:
199    liveins: %sgpr0_sgpr1
200
201    %sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM %sgpr0_sgpr1, 11 :: (non-temporal invariant load 8 from `i64 addrspace(2)* undef`)
202    %sgpr7 = S_MOV_B32 61440
203    %sgpr6 = S_MOV_B32 -1
204    %vgpr4 = V_MOV_B32_e32 5, implicit %exec
205    S_WAITCNT 127
206    %vgpr0_vgpr1_vgpr2_vgpr3 = BUFFER_LOAD_DWORDX4_OFFSET %sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit %exec :: (load 16 from %ir.in)
207    %sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed %sgpr0_sgpr1, 9 :: (non-temporal invariant load 8 from `i64 addrspace(2)* undef`)
208    S_WAITCNT 3952
209    %vgpr0_vgpr1_vgpr2_vgpr3, dead %sgpr0_sgpr1 = SI_INDIRECT_DST_V4 %vgpr0_vgpr1_vgpr2_vgpr3, undef %vgpr10, 0, killed %vgpr4, implicit-def dead %exec, implicit-def dead %vcc, implicit-def dead %m0, implicit %exec
210    S_WAITCNT 127
211    BUFFER_STORE_DWORDX4_OFFSET killed %vgpr0_vgpr1_vgpr2_vgpr3, killed %sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit %exec :: (store 16 into %ir.out)
212    S_ENDPGM
213
214...
215
216# CHECK-LABEL: name: insert_undef_neg_offset_vgpr{{$}}
217# CHECK: bb.1:
218# CHECK: successors: %bb.2(0x40000000 / 0x80000000 = 50.00%), %bb.1(0x40000000 / 0x80000000 = 50.00%)
219# CHECK: liveins: %vgpr4, %vgpr0_vgpr1_vgpr2_vgpr3{{$}}
220
221# CHECK: %vcc_lo = V_READFIRSTLANE_B32 undef %vgpr10, implicit %exec
222# CHECK: %m0 = S_MOV_B32 %vcc_lo
223# CHECK: %m0 = S_ADD_I32 %m0, -7, implicit-def %scc
224# CHECK: %vgpr0 = V_MOVRELD_B32_e32 %vgpr4, implicit %m0, implicit %exec, implicit %vgpr0_vgpr1_vgpr2_vgpr3
225# CHECK: S_CBRANCH_EXECNZ %bb.1, implicit %exec
226
227# CHECK: bb.2:
228# CHECK: liveins: %sgpr6_sgpr7, %sgpr7, %sgpr4_sgpr5, %sgpr5, %sgpr4_sgpr5_sgpr6_sgpr7, %sgpr6, %sgpr4, %vgpr0_vgpr1_vgpr2_vgpr3, %vgpr0, %vgpr1, %vgpr2, %vgpr3, %vgpr0_vgpr1, %vgpr2_vgpr3, %vgpr0_vgpr1_vgpr2, %vgpr1_vgpr2, %vgpr1_vgpr2_vgpr3, %vgpr4, %sgpr0_sgpr1, %sgpr0, %sgpr1{{$}}
229
230name:            insert_undef_neg_offset_vgpr
231alignment:       0
232exposesReturnsTwice: false
233hasInlineAsm:    false
234allVRegsAllocated: true
235isSSA:           false
236tracksRegLiveness: true
237tracksSubRegLiveness: true
238liveins:
239  - { reg: '%sgpr0_sgpr1' }
240frameInfo:
241  isFrameAddressTaken: false
242  isReturnAddressTaken: false
243  hasStackMap:     false
244  hasPatchPoint:   false
245  stackSize:       0
246  offsetAdjustment: 0
247  maxAlignment:    0
248  adjustsStack:    false
249  hasCalls:        false
250  maxCallFrameSize: 0
251  hasOpaqueSPAdjustment: false
252  hasVAStart:      false
253  hasMustTailInVarArgFunc: false
254body:             |
255  bb.0.entry:
256    liveins: %sgpr0_sgpr1
257
258    %sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM %sgpr0_sgpr1, 11 :: (non-temporal invariant load 8 from `i64 addrspace(2)* undef`)
259    %sgpr7 = S_MOV_B32 61440
260    %sgpr6 = S_MOV_B32 -1
261    %vgpr4 = V_MOV_B32_e32 5, implicit %exec
262    S_WAITCNT 127
263    %vgpr0_vgpr1_vgpr2_vgpr3 = BUFFER_LOAD_DWORDX4_OFFSET %sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit %exec :: (load 16 from %ir.in)
264    %sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed %sgpr0_sgpr1, 9 :: (non-temporal invariant load 8 from `i64 addrspace(2)* undef`)
265    S_WAITCNT 3952
266    %vgpr0_vgpr1_vgpr2_vgpr3, dead %sgpr0_sgpr1 = SI_INDIRECT_DST_V4 %vgpr0_vgpr1_vgpr2_vgpr3, undef %vgpr10, -7, killed %vgpr4, implicit-def dead %exec, implicit-def dead %vcc, implicit-def dead %m0, implicit %exec
267    S_WAITCNT 127
268    BUFFER_STORE_DWORDX4_OFFSET killed %vgpr0_vgpr1_vgpr2_vgpr3, killed %sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit %exec :: (store 16 into %ir.out)
269    S_ENDPGM
270
271...
272
273# CHECK-LABEL: insert_undef_value_offset_vgpr{{$}}
274# CHECK: bb.1:
275# CHECK: successors: %bb.2(0x40000000 / 0x80000000 = 50.00%), %bb.1(0x40000000 / 0x80000000 = 50.00%)
276# CHECK: liveins: %vgpr4, %vgpr0_vgpr1_vgpr2_vgpr3{{$}}
277
278# CHECK: %vcc_lo = V_READFIRSTLANE_B32 %vgpr4, implicit %exec
279# CHECK: %m0 = S_MOV_B32 %vcc_lo
280# CHECK: %vgpr0 = V_MOVRELD_B32_e32 undef %vgpr10, implicit %m0, implicit %exec, implicit %vgpr0_vgpr1_vgpr2_vgpr3
281# CHECK: S_CBRANCH_EXECNZ %bb.1, implicit %exec
282
283# CHECK: bb.2:
284# CHECK: liveins: %sgpr6_sgpr7, %sgpr7, %sgpr4_sgpr5, %sgpr5, %sgpr4_sgpr5_sgpr6_sgpr7, %sgpr6, %sgpr4, %vgpr0_vgpr1_vgpr2_vgpr3, %vgpr0, %vgpr1, %vgpr2, %vgpr3, %vgpr0_vgpr1, %vgpr2_vgpr3, %vgpr0_vgpr1_vgpr2, %vgpr1_vgpr2, %vgpr1_vgpr2_vgpr3, %vgpr4, %sgpr0_sgpr1, %sgpr0, %sgpr1{{$}}
285
286name:            insert_undef_value_offset_vgpr
287alignment:       0
288exposesReturnsTwice: false
289hasInlineAsm:    false
290allVRegsAllocated: true
291isSSA:           false
292tracksRegLiveness: true
293tracksSubRegLiveness: true
294liveins:
295  - { reg: '%sgpr0_sgpr1' }
296frameInfo:
297  isFrameAddressTaken: false
298  isReturnAddressTaken: false
299  hasStackMap:     false
300  hasPatchPoint:   false
301  stackSize:       0
302  offsetAdjustment: 0
303  maxAlignment:    0
304  adjustsStack:    false
305  hasCalls:        false
306  maxCallFrameSize: 0
307  hasOpaqueSPAdjustment: false
308  hasVAStart:      false
309  hasMustTailInVarArgFunc: false
310body:             |
311  bb.0.entry:
312    liveins: %sgpr0_sgpr1
313
314    %sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM %sgpr0_sgpr1, 11 :: (non-temporal invariant load 8 from `i64 addrspace(2)* undef`)
315    %sgpr7 = S_MOV_B32 61440
316    %sgpr6 = S_MOV_B32 -1
317    %vgpr4 = V_MOV_B32_e32 2, implicit %exec
318    S_WAITCNT 127
319    %vgpr0_vgpr1_vgpr2_vgpr3 = BUFFER_LOAD_DWORDX4_OFFSET %sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit %exec :: (load 16 from %ir.in)
320    %sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed %sgpr0_sgpr1, 9 :: (non-temporal invariant load 8 from `i64 addrspace(2)* undef`)
321    S_WAITCNT 3952
322    %vgpr0_vgpr1_vgpr2_vgpr3, dead %sgpr0_sgpr1 = SI_INDIRECT_DST_V4 %vgpr0_vgpr1_vgpr2_vgpr3, killed %vgpr4, 0, undef %vgpr10, implicit-def dead %exec, implicit-def dead %vcc, implicit-def dead %m0, implicit %exec
323    S_WAITCNT 127
324    BUFFER_STORE_DWORDX4_OFFSET killed %vgpr0_vgpr1_vgpr2_vgpr3, killed %sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit %exec :: (store 16 into %ir.out)
325    S_ENDPGM
326
327...
328