1# RUN: llc -march=amdgcn -verify-machineinstrs -run-pass si-load-store-opt -o - %s | FileCheck %s
2
3# Check that SILoadStoreOptimizer honors physregs defs/uses between moved
4# instructions.
5#
6# The following IR snippet would usually be optimized by the peephole optimizer.
7# However, an equivalent situation can occur with buffer instructions as well.
8
9# CHECK-LABEL: name: scc_def_and_use_no_dependency
10# CHECK: S_ADD_U32
11# CHECK: S_ADDC_U32
12# CHECK: DS_READ2_B32
13--- |
14  define amdgpu_kernel void @scc_def_and_use_no_dependency(i32 addrspace(3)* %ptr.0) nounwind {
15    %ptr.4 = getelementptr i32, i32 addrspace(3)* %ptr.0, i32 1
16    %ptr.64 = getelementptr i32, i32 addrspace(3)* %ptr.0, i32 16
17    ret void
18  }
19
20  define amdgpu_kernel void @scc_def_and_use_dependency(i32 addrspace(3)* %ptr.0) nounwind {
21    %ptr.4 = getelementptr i32, i32 addrspace(3)* %ptr.0, i32 1
22    %ptr.64 = getelementptr i32, i32 addrspace(3)* %ptr.0, i32 16
23    ret void
24  }
25...
26---
27name:            scc_def_and_use_no_dependency
28alignment:       0
29exposesReturnsTwice: false
30legalized:       false
31regBankSelected: false
32selected:        false
33tracksRegLiveness: false
34liveins:
35  - { reg: '$vgpr0' }
36  - { reg: '$sgpr0' }
37frameInfo:
38  isFrameAddressTaken: false
39  isReturnAddressTaken: false
40  hasStackMap:     false
41  hasPatchPoint:   false
42  stackSize:       0
43  offsetAdjustment: 0
44  maxAlignment:    0
45  adjustsStack:    false
46  hasCalls:        false
47  maxCallFrameSize: 0
48  hasOpaqueSPAdjustment: false
49  hasVAStart:      false
50  hasMustTailInVarArgFunc: false
51body:             |
52  bb.0:
53    liveins: $vgpr0, $sgpr0
54
55    %1:vgpr_32 = COPY $vgpr0
56    %10:sgpr_32 = COPY $sgpr0
57
58    $m0 = S_MOV_B32 -1
59    %2:vgpr_32 = DS_READ_B32 %1, 0, 0, implicit $m0, implicit $exec :: (load 4 from %ir.ptr.0)
60
61    %11:sgpr_32 = S_ADD_U32 %10, 4, implicit-def $scc
62    %12:sgpr_32 = S_ADDC_U32 %10, 0, implicit-def dead $scc, implicit $scc
63
64    %3:vgpr_32 = DS_READ_B32 %1, 64, 0, implicit $m0, implicit $exec :: (load 4 from %ir.ptr.64)
65    S_ENDPGM
66
67...
68
69# CHECK-LABEL: name: scc_def_and_use_dependency
70# CHECK: DS_READ2_B32
71# CHECK: S_ADD_U32
72# CHECK: S_ADDC_U32
73---
74name:            scc_def_and_use_dependency
75alignment:       0
76exposesReturnsTwice: false
77legalized:       false
78regBankSelected: false
79selected:        false
80tracksRegLiveness: false
81liveins:
82  - { reg: '$vgpr0' }
83  - { reg: '$sgpr0' }
84frameInfo:
85  isFrameAddressTaken: false
86  isReturnAddressTaken: false
87  hasStackMap:     false
88  hasPatchPoint:   false
89  stackSize:       0
90  offsetAdjustment: 0
91  maxAlignment:    0
92  adjustsStack:    false
93  hasCalls:        false
94  maxCallFrameSize: 0
95  hasOpaqueSPAdjustment: false
96  hasVAStart:      false
97  hasMustTailInVarArgFunc: false
98body:             |
99  bb.0:
100    liveins: $vgpr0, $sgpr0
101
102    %1:vgpr_32 = COPY $vgpr0
103    %10:sgpr_32 = COPY $sgpr0
104
105    $m0 = S_MOV_B32 -1
106    %2:vgpr_32 = DS_READ_B32 %1, 0, 0, implicit $m0, implicit $exec :: (load 4 from %ir.ptr.0)
107    %20:sgpr_32 = V_READFIRSTLANE_B32 %2, implicit $exec
108
109    %21:sgpr_32 = S_ADD_U32 %20, 4, implicit-def $scc
110    ; The S_ADDC_U32 depends on the first DS_READ_B32 only via SCC
111    %11:sgpr_32 = S_ADDC_U32 %10, 0, implicit-def dead $scc, implicit $scc
112
113    %3:vgpr_32 = DS_READ_B32 %1, 64, 0, implicit $m0, implicit $exec :: (load 4 from %ir.ptr.64)
114    S_ENDPGM
115
116...
117