1# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=si-load-store-opt -verify-machineinstrs -o - %s | FileCheck %s 2 3# The purpose of this test is to make sure we are combining relevant memory 4# operations correctly with/without DLC bit. 5 6--- | 7 define amdgpu_kernel void @test1(i32 addrspace(1)* %out) { 8 %out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i32 1 9 store i32 123, i32 addrspace(1)* %out.gep.1 10 store i32 456, i32 addrspace(1)* %out 11 ret void 12 } 13 14 define amdgpu_kernel void @test2(i32 addrspace(1)* %out) { 15 %out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i32 1 16 store i32 123, i32 addrspace(1)* %out.gep.1 17 store i32 456, i32 addrspace(1)* %out 18 ret void 19 } 20 21 define amdgpu_kernel void @test3(i32 addrspace(1)* %out) { 22 %out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i32 1 23 store i32 123, i32 addrspace(1)* %out.gep.1 24 store i32 456, i32 addrspace(1)* %out 25 ret void 26 } 27 define amdgpu_kernel void @test4(i32 addrspace(1)* %out) { 28 %out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i32 1 29 store i32 123, i32 addrspace(1)* %out.gep.1 30 store i32 456, i32 addrspace(1)* %out 31 ret void 32 } 33... 34 35# CHECK: BUFFER_STORE_DWORDX2_OFFSET killed %{{[0-9]+}}, %{{[0-9]+}}, 0, 4, 0, 0, 0, 0, 0, implicit $exec :: (store 8 into %ir.out.gep.1, align 4, addrspace 1) 36--- 37name: test1 38liveins: 39 - { reg: '$sgpr0_sgpr1', virtual-reg: '' } 40body: | 41 bb.0 (%ir-block.0): 42 liveins: $sgpr0_sgpr1 43 44 $vgpr0 = V_MOV_B32_e32 123, implicit $exec 45 $vgpr1 = V_MOV_B32_e32 456, implicit $exec 46 47 $sgpr2 = S_MOV_B32 -1 48 $sgpr3 = S_MOV_B32 61440 49 50 %0:sgpr_64 = COPY $sgpr0_sgpr1 51 %1:sgpr_64 = S_LOAD_DWORDX2_IMM %1, 36, 0, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, addrspace 4) 52 %2:sgpr_32 = COPY $sgpr2 53 %3:sgpr_32 = COPY $sgpr3 54 %4:sgpr_128 = REG_SEQUENCE %1, %2, %3 55 56 %5:vgpr_32 = COPY $vgpr0 57 %6:vgpr_32 = COPY $vgpr1 58 59 BUFFER_STORE_DWORD_OFFSET %5, %4, 0, 4, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %ir.out.gep.1, addrspace 1) 60 BUFFER_STORE_DWORD_OFFSET %6, %4, 0, 8, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %ir.out.gep.1, addrspace 1) 61 62 S_ENDPGM 0 63... 64 65# CHECK: BUFFER_STORE_DWORD_OFFSET %{{[0-9]+}}, %{{[0-9]+}}, 0, 4, 0, 0, 0, 1, 0, implicit $exec :: (store 4 into %ir.out.gep.1, addrspace 1) 66# CHECK: BUFFER_STORE_DWORD_OFFSET %{{[0-9]+}}, %{{[0-9]+}}, 0, 8, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %ir.out.gep.1, addrspace 1) 67--- 68name: test2 69liveins: 70 - { reg: '$sgpr0_sgpr1', virtual-reg: '' } 71body: | 72 bb.0 (%ir-block.0): 73 liveins: $sgpr0_sgpr1 74 75 $vgpr0 = V_MOV_B32_e32 123, implicit $exec 76 $vgpr1 = V_MOV_B32_e32 456, implicit $exec 77 78 $sgpr2 = S_MOV_B32 -1 79 $sgpr3 = S_MOV_B32 61440 80 81 %0:sgpr_64 = COPY $sgpr0_sgpr1 82 %1:sgpr_64 = S_LOAD_DWORDX2_IMM %1, 36, 0, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, addrspace 4) 83 %2:sgpr_32 = COPY $sgpr2 84 %3:sgpr_32 = COPY $sgpr3 85 %4:sgpr_128 = REG_SEQUENCE %1, %2, %3 86 87 %5:vgpr_32 = COPY $vgpr0 88 %6:vgpr_32 = COPY $vgpr1 89 90 BUFFER_STORE_DWORD_OFFSET %5, %4, 0, 4, 0, 0, 0, 1, 0, implicit $exec :: (store 4 into %ir.out.gep.1, addrspace 1) 91 BUFFER_STORE_DWORD_OFFSET %6, %4, 0, 8, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %ir.out.gep.1, addrspace 1) 92 93 S_ENDPGM 0 94... 95 96# CHECK: BUFFER_STORE_DWORD_OFFSET %{{[0-9]+}}, %{{[0-9]+}}, 0, 4, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %ir.out.gep.1, addrspace 1) 97# CHECK: BUFFER_STORE_DWORD_OFFSET %{{[0-9]+}}, %{{[0-9]+}}, 0, 8, 0, 0, 0, 1, 0, implicit $exec :: (store 4 into %ir.out.gep.1, addrspace 1) 98--- 99name: test3 100liveins: 101 - { reg: '$sgpr0_sgpr1', virtual-reg: '' } 102body: | 103 bb.0 (%ir-block.0): 104 liveins: $sgpr0_sgpr1 105 106 $vgpr0 = V_MOV_B32_e32 123, implicit $exec 107 $vgpr1 = V_MOV_B32_e32 456, implicit $exec 108 109 $sgpr2 = S_MOV_B32 -1 110 $sgpr3 = S_MOV_B32 61440 111 112 %0:sgpr_64 = COPY $sgpr0_sgpr1 113 %1:sgpr_64 = S_LOAD_DWORDX2_IMM %1, 36, 0, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, addrspace 4) 114 %2:sgpr_32 = COPY $sgpr2 115 %3:sgpr_32 = COPY $sgpr3 116 %4:sgpr_128 = REG_SEQUENCE %1, %2, %3 117 118 %5:vgpr_32 = COPY $vgpr0 119 %6:vgpr_32 = COPY $vgpr1 120 121 BUFFER_STORE_DWORD_OFFSET %5, %4, 0, 4, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %ir.out.gep.1, addrspace 1) 122 BUFFER_STORE_DWORD_OFFSET %6, %4, 0, 8, 0, 0, 0, 1, 0, implicit $exec :: (store 4 into %ir.out.gep.1, addrspace 1) 123 124 S_ENDPGM 0 125... 126 127# CHECK: BUFFER_STORE_DWORDX2_OFFSET killed %{{[0-9]+}}, %{{[0-9]+}}, 0, 4, 0, 0, 0, 1, 0, implicit $exec :: (store 8 into %ir.out.gep.1, align 4, addrspace 1) 128--- 129name: test4 130liveins: 131 - { reg: '$sgpr0_sgpr1', virtual-reg: '' } 132body: | 133 bb.0 (%ir-block.0): 134 liveins: $sgpr0_sgpr1 135 136 $vgpr0 = V_MOV_B32_e32 123, implicit $exec 137 $vgpr1 = V_MOV_B32_e32 456, implicit $exec 138 139 $sgpr2 = S_MOV_B32 -1 140 $sgpr3 = S_MOV_B32 61440 141 142 %0:sgpr_64 = COPY $sgpr0_sgpr1 143 %1:sgpr_64 = S_LOAD_DWORDX2_IMM %1, 36, 0, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, addrspace 4) 144 %2:sgpr_32 = COPY $sgpr2 145 %3:sgpr_32 = COPY $sgpr3 146 %4:sgpr_128 = REG_SEQUENCE %1, %2, %3 147 148 %5:vgpr_32 = COPY $vgpr0 149 %6:vgpr_32 = COPY $vgpr1 150 151 BUFFER_STORE_DWORD_OFFSET %5, %4, 0, 4, 0, 0, 0, 1, 0, implicit $exec :: (store 4 into %ir.out.gep.1, addrspace 1) 152 BUFFER_STORE_DWORD_OFFSET %6, %4, 0, 8, 0, 0, 0, 1, 0, implicit $exec :: (store 4 into %ir.out.gep.1, addrspace 1) 153 154 S_ENDPGM 0 155... 156