1# RUN: llc -march=amdgcn -verify-machineinstrs -run-pass si-load-store-opt -o - %s | FileCheck %s 2 3# Check that SILoadStoreOptimizer honors physregs defs/uses between moved 4# instructions. 5# 6# The following IR snippet would usually be optimized by the peephole optimizer. 7# However, an equivalent situation can occur with buffer instructions as well. 8 9# CHECK-LABEL: name: scc_def_and_use_no_dependency 10# CHECK: S_ADD_U32 11# CHECK: S_ADDC_U32 12# CHECK: DS_READ2_B32 13--- | 14 define amdgpu_kernel void @scc_def_and_use_no_dependency(i32 addrspace(3)* %ptr.0) nounwind { 15 %ptr.4 = getelementptr i32, i32 addrspace(3)* %ptr.0, i32 1 16 %ptr.64 = getelementptr i32, i32 addrspace(3)* %ptr.0, i32 16 17 ret void 18 } 19 20 define amdgpu_kernel void @scc_def_and_use_dependency(i32 addrspace(3)* %ptr.0) nounwind { 21 %ptr.4 = getelementptr i32, i32 addrspace(3)* %ptr.0, i32 1 22 %ptr.64 = getelementptr i32, i32 addrspace(3)* %ptr.0, i32 16 23 ret void 24 } 25... 26--- 27name: scc_def_and_use_no_dependency 28alignment: 0 29exposesReturnsTwice: false 30legalized: false 31regBankSelected: false 32selected: false 33tracksRegLiveness: false 34liveins: 35 - { reg: '$vgpr0' } 36 - { reg: '$sgpr0' } 37frameInfo: 38 isFrameAddressTaken: false 39 isReturnAddressTaken: false 40 hasStackMap: false 41 hasPatchPoint: false 42 stackSize: 0 43 offsetAdjustment: 0 44 maxAlignment: 0 45 adjustsStack: false 46 hasCalls: false 47 maxCallFrameSize: 0 48 hasOpaqueSPAdjustment: false 49 hasVAStart: false 50 hasMustTailInVarArgFunc: false 51body: | 52 bb.0: 53 liveins: $vgpr0, $sgpr0 54 55 %1:vgpr_32 = COPY $vgpr0 56 %10:sgpr_32 = COPY $sgpr0 57 58 $m0 = S_MOV_B32 -1 59 %2:vgpr_32 = DS_READ_B32 %1, 0, 0, implicit $m0, implicit $exec :: (load 4 from %ir.ptr.0) 60 61 %11:sgpr_32 = S_ADD_U32 %10, 4, implicit-def $scc 62 %12:sgpr_32 = S_ADDC_U32 %10, 0, implicit-def dead $scc, implicit $scc 63 64 %3:vgpr_32 = DS_READ_B32 %1, 64, 0, implicit $m0, implicit $exec :: (load 4 from %ir.ptr.64) 65 S_ENDPGM 66 67... 68 69# CHECK-LABEL: name: scc_def_and_use_dependency 70# CHECK: DS_READ2_B32 71# CHECK: S_ADD_U32 72# CHECK: S_ADDC_U32 73--- 74name: scc_def_and_use_dependency 75alignment: 0 76exposesReturnsTwice: false 77legalized: false 78regBankSelected: false 79selected: false 80tracksRegLiveness: false 81liveins: 82 - { reg: '$vgpr0' } 83 - { reg: '$sgpr0' } 84frameInfo: 85 isFrameAddressTaken: false 86 isReturnAddressTaken: false 87 hasStackMap: false 88 hasPatchPoint: false 89 stackSize: 0 90 offsetAdjustment: 0 91 maxAlignment: 0 92 adjustsStack: false 93 hasCalls: false 94 maxCallFrameSize: 0 95 hasOpaqueSPAdjustment: false 96 hasVAStart: false 97 hasMustTailInVarArgFunc: false 98body: | 99 bb.0: 100 liveins: $vgpr0, $sgpr0 101 102 %1:vgpr_32 = COPY $vgpr0 103 %10:sgpr_32 = COPY $sgpr0 104 105 $m0 = S_MOV_B32 -1 106 %2:vgpr_32 = DS_READ_B32 %1, 0, 0, implicit $m0, implicit $exec :: (load 4 from %ir.ptr.0) 107 %20:sgpr_32 = V_READFIRSTLANE_B32 %2, implicit $exec 108 109 %21:sgpr_32 = S_ADD_U32 %20, 4, implicit-def $scc 110 ; The S_ADDC_U32 depends on the first DS_READ_B32 only via SCC 111 %11:sgpr_32 = S_ADDC_U32 %10, 0, implicit-def dead $scc, implicit $scc 112 113 %3:vgpr_32 = DS_READ_B32 %1, 64, 0, implicit $m0, implicit $exec :: (load 4 from %ir.ptr.64) 114 S_ENDPGM 115 116... 117