1# RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs -run-pass si-wqm -o -  %s | FileCheck %s
2
3---
4# Check for awareness that s_or_saveexec_b64 clobbers SCC
5#
6#CHECK: ENTER_WWM
7#CHECK: S_CMP_LT_I32
8#CHECK: S_CSELECT_B32
9name:            test_wwm_scc
10alignment:       1
11exposesReturnsTwice: false
12legalized:       false
13regBankSelected: false
14selected:        false
15tracksRegLiveness: true
16registers:
17  - { id: 0, class: sgpr_32, preferred-register: '' }
18  - { id: 1, class: sgpr_32, preferred-register: '' }
19  - { id: 2, class: sgpr_32, preferred-register: '' }
20  - { id: 3, class: vgpr_32, preferred-register: '' }
21  - { id: 4, class: vgpr_32, preferred-register: '' }
22  - { id: 5, class: sgpr_32, preferred-register: '' }
23  - { id: 6, class: vgpr_32, preferred-register: '' }
24  - { id: 7, class: vgpr_32, preferred-register: '' }
25  - { id: 8, class: sreg_32_xm0, preferred-register: '' }
26  - { id: 9, class: sreg_32, preferred-register: '' }
27  - { id: 10, class: sreg_32, preferred-register: '' }
28  - { id: 11, class: vgpr_32, preferred-register: '' }
29  - { id: 12, class: vgpr_32, preferred-register: '' }
30liveins:
31  - { reg: '$sgpr0', virtual-reg: '%0' }
32  - { reg: '$sgpr1', virtual-reg: '%1' }
33  - { reg: '$sgpr2', virtual-reg: '%2' }
34  - { reg: '$vgpr0', virtual-reg: '%3' }
35body:             |
36  bb.0:
37    liveins: $sgpr0, $sgpr1, $sgpr2, $vgpr0
38
39    %3 = COPY $vgpr0
40    %2 = COPY $sgpr2
41    %1 = COPY $sgpr1
42    %0 = COPY $sgpr0
43    S_CMP_LT_I32 0, %0, implicit-def $scc
44    %12 = V_ADD_CO_U32_e32 %3, %3, implicit-def $vcc, implicit $exec
45    %5 = S_CSELECT_B32 %2, %1, implicit $scc
46    %11 = V_ADD_CO_U32_e32 %5, %12, implicit-def $vcc, implicit $exec
47    $vgpr0 = WWM %11, implicit $exec
48    SI_RETURN_TO_EPILOG $vgpr0
49
50...
51
52---
53# V_SET_INACTIVE, when its second operand is undef, is replaced by a
54# COPY by si-wqm. Ensure the instruction is removed.
55#CHECK-NOT: V_SET_INACTIVE
56name:            no_cfg
57alignment:       1
58exposesReturnsTwice: false
59legalized:       false
60regBankSelected: false
61selected:        false
62failedISel:      false
63tracksRegLiveness: true
64hasWinCFI:       false
65registers:
66  - { id: 0, class: sgpr_32, preferred-register: '' }
67  - { id: 1, class: sgpr_32, preferred-register: '' }
68  - { id: 2, class: sgpr_32, preferred-register: '' }
69  - { id: 3, class: sgpr_32, preferred-register: '' }
70  - { id: 4, class: sgpr_32, preferred-register: '' }
71  - { id: 5, class: sgpr_128, preferred-register: '' }
72  - { id: 6, class: sgpr_128, preferred-register: '' }
73  - { id: 7, class: sreg_32, preferred-register: '' }
74  - { id: 8, class: vreg_64, preferred-register: '' }
75  - { id: 9, class: sreg_32, preferred-register: '' }
76  - { id: 10, class: vgpr_32, preferred-register: '' }
77  - { id: 11, class: vgpr_32, preferred-register: '' }
78  - { id: 12, class: sreg_32, preferred-register: '' }
79  - { id: 13, class: vgpr_32, preferred-register: '' }
80  - { id: 14, class: vgpr_32, preferred-register: '' }
81  - { id: 15, class: vgpr_32, preferred-register: '' }
82  - { id: 16, class: vgpr_32, preferred-register: '' }
83liveins:
84  - { reg: '$sgpr0', virtual-reg: '%0' }
85  - { reg: '$sgpr1', virtual-reg: '%1' }
86  - { reg: '$sgpr2', virtual-reg: '%2' }
87  - { reg: '$sgpr3', virtual-reg: '%3' }
88body:             |
89  bb.0:
90    liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3
91
92    %3:sgpr_32 = COPY $sgpr3
93    %2:sgpr_32 = COPY $sgpr2
94    %1:sgpr_32 = COPY $sgpr1
95    %0:sgpr_32 = COPY $sgpr0
96    %6:sgpr_128 = REG_SEQUENCE %0, %subreg.sub0, %1, %subreg.sub1, %2, %subreg.sub2, %3, %subreg.sub3
97    %5:sgpr_128 = COPY %6
98    %7:sreg_32 = S_MOV_B32 0
99    %8:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %6, %7, 0, 0, 0, 0, 0, 0, implicit $exec
100    %16:vgpr_32 = COPY %8.sub1
101    %11:vgpr_32 = COPY %16
102    %10:vgpr_32 = V_SET_INACTIVE_B32 %11, undef %12:sreg_32, implicit $exec
103    %14:vgpr_32 = COPY %7
104    %13:vgpr_32 = V_MOV_B32_dpp %14, killed %10, 323, 12, 15, 0, implicit $exec
105    early-clobber %15:vgpr_32 = WWM killed %13, implicit $exec
106    BUFFER_STORE_DWORD_OFFSET_exact killed %15, %6, %7, 4, 0, 0, 0, 0, 0, implicit $exec
107    S_ENDPGM 0
108
109...
110
111---
112# Ensure that wwm is not put around an EXEC copy
113#CHECK-LABEL: name: copy_exec
114#CHECK: %7:sreg_64 = COPY $exec
115#CHECK-NEXT: %14:sreg_64 = ENTER_WWM -1, implicit-def $exec, implicit-def $scc, implicit $exec
116#CHECK-NEXT: %8:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
117#CHECK-NEXT: $exec = EXIT_WWM %14
118#CHECK-NEXT: %9:vgpr_32 = V_MBCNT_LO_U32_B32_e64 %7.sub0, 0, implicit $exec
119name:            copy_exec
120tracksRegLiveness: true
121body:             |
122  bb.0:
123    liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3
124
125    %3:sgpr_32 = COPY $sgpr3
126    %2:sgpr_32 = COPY $sgpr2
127    %1:sgpr_32 = COPY $sgpr1
128    %0:sgpr_32 = COPY $sgpr0
129    %4:sgpr_128 = REG_SEQUENCE %0, %subreg.sub0, %1, %subreg.sub1, %2, %subreg.sub2, %3, %subreg.sub3
130    %5:sreg_32 = S_MOV_B32 0
131    %6:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %4, %5, 0, 0, 0, 0, 0, 0, implicit $exec
132
133    %8:sreg_64 = COPY $exec
134    %9:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
135    %10:vgpr_32 = V_MBCNT_LO_U32_B32_e64 %8.sub0:sreg_64, 0, implicit $exec
136    %11:vgpr_32 = V_MOV_B32_dpp %9:vgpr_32, %10:vgpr_32, 312, 15, 15, 0, implicit $exec
137    %12:sreg_32 = V_READLANE_B32 %11:vgpr_32, 63
138    early-clobber %13:sreg_32 = WWM %9:vgpr_32, implicit $exec
139
140    %14:vgpr_32 = COPY %13
141    BUFFER_STORE_DWORD_OFFSET_exact killed %14, %4, %5, 4, 0, 0, 0, 0, 0, implicit $exec
142    S_ENDPGM 0
143
144...
145
146---
147# Check exit of WQM is still inserted correctly when SCC is live until block end.
148# Critially this tests that compilation does not fail.
149#CHECK-LABEL: name: scc_always_live
150#CHECK: %8:vreg_128 = IMAGE_SAMPLE_V4_V2 %7
151#CHECK-NEXT: S_CMP_EQ_U32 %2, 0, implicit-def $scc
152#CHECK-NEXT: undef %9.sub0:vreg_64 = nsz arcp nofpexcept V_ADD_F32_e64
153#CHECK-NEXT: %9.sub1:vreg_64 = nsz arcp nofpexcept V_MUL_F32_e32
154#CHECK-NEXT: %14:sreg_32_xm0 = COPY $scc
155#CHECK-NEXT: $exec = S_AND_B64 $exec, %13, implicit-def $scc
156#CHECK-NEXT: $scc = COPY %14
157#CHECK-NEXT: %10:vgpr_32 = nsz arcp nofpexcept V_ADD_F32_e64
158#CHECK-NEXT: %11:vreg_128 = IMAGE_SAMPLE_V4_V2
159#CHECK-NEXT: S_CBRANCH_SCC0 %bb.2
160name:            scc_always_live
161tracksRegLiveness: true
162body:             |
163  bb.0:
164    liveins: $sgpr1, $sgpr2, $vgpr1, $vgpr2
165
166    $m0 = COPY $sgpr1
167    %0:vgpr_32 = COPY $vgpr1
168    %1:vgpr_32 = COPY $vgpr2
169    %8:sgpr_32 = COPY $sgpr2
170    %100:sgpr_256 = IMPLICIT_DEF
171    %101:sgpr_128 = IMPLICIT_DEF
172
173    %2:vgpr_32 = V_INTERP_P1_F32 %0:vgpr_32, 3, 2, implicit $mode, implicit $m0, implicit $exec
174    %3:vgpr_32 = V_INTERP_P1_F32 %1:vgpr_32, 3, 2, implicit $mode, implicit $m0, implicit $exec
175
176    undef %7.sub0:vreg_64 = COPY %2:vgpr_32
177    %7.sub1:vreg_64 = COPY %3:vgpr_32
178
179    %4:vreg_128 = IMAGE_SAMPLE_V4_V2 %7:vreg_64, %100:sgpr_256, %101:sgpr_128, 15, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
180    S_CMP_EQ_U32 %8:sgpr_32, 0, implicit-def $scc
181
182    undef %5.sub0:vreg_64 = nsz arcp nofpexcept V_ADD_F32_e64 0, %4.sub0:vreg_128, 0, %3:vgpr_32, 1, 0, implicit $mode, implicit $exec
183    %5.sub1:vreg_64 = nsz arcp nofpexcept V_MUL_F32_e32 %2, %3, implicit $mode, implicit $exec
184    %6:vgpr_32 = nsz arcp nofpexcept V_ADD_F32_e64 0, %2:vgpr_32, 0, %3:vgpr_32, 1, 0, implicit $mode, implicit $exec
185
186    %9:vreg_128 = IMAGE_SAMPLE_V4_V2 %5:vreg_64, %100:sgpr_256, %101:sgpr_128, 15, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
187
188    S_CBRANCH_SCC0 %bb.2, implicit $scc
189
190  bb.1:
191    %10:sreg_32 = S_MOV_B32 0
192    BUFFER_STORE_DWORD_OFFSET_exact %6:vgpr_32, %101:sgpr_128, %10:sreg_32, 4, 0, 0, 0, 0, 0, implicit $exec
193    S_ENDPGM 0
194
195  bb.2:
196    $vgpr0 = COPY %4.sub0:vreg_128
197    $vgpr1 = COPY %4.sub1:vreg_128
198    $vgpr2 = COPY %9.sub0:vreg_128
199    $vgpr3 = COPY %9.sub1:vreg_128
200    SI_RETURN_TO_EPILOG $vgpr0, $vgpr1, $vgpr2, $vgpr3
201...
202