1; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py 2; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -verify-machineinstrs -simplify-mir -stop-after=instruction-select -o - %s | FileCheck -check-prefix=GFX6 %s 3; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii -verify-machineinstrs -simplify-mir -stop-after=instruction-select -o - %s | FileCheck -check-prefix=GFX7 %s 4; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -verify-machineinstrs -simplify-mir -stop-after=instruction-select -o - %s | FileCheck -check-prefix=GFX8 %s 5 6; FIXME: Merge with regbankselect, which mostly overlaps when all types supported. 7 8; Natural mapping 9define amdgpu_ps i32 @s_buffer_load_i32(<4 x i32> inreg %rsrc, i32 inreg %soffset) { 10 ; GFX6-LABEL: name: s_buffer_load_i32 11 ; GFX6: bb.1 (%ir-block.0): 12 ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 13 ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 14 ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 15 ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 16 ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 17 ; GFX6: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 18 ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 19 ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[COPY4]], 0, 0 :: (dereferenceable invariant load 4) 20 ; GFX6: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] 21 ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec 22 ; GFX6: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] 23 ; GFX6: SI_RETURN_TO_EPILOG implicit $sgpr0 24 ; GFX7-LABEL: name: s_buffer_load_i32 25 ; GFX7: bb.1 (%ir-block.0): 26 ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 27 ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 28 ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 29 ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 30 ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 31 ; GFX7: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 32 ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 33 ; GFX7: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[COPY4]], 0, 0 :: (dereferenceable invariant load 4) 34 ; GFX7: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] 35 ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec 36 ; GFX7: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] 37 ; GFX7: SI_RETURN_TO_EPILOG implicit $sgpr0 38 ; GFX8-LABEL: name: s_buffer_load_i32 39 ; GFX8: bb.1 (%ir-block.0): 40 ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 41 ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 42 ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 43 ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 44 ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 45 ; GFX8: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 46 ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 47 ; GFX8: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[COPY4]], 0, 0 :: (dereferenceable invariant load 4) 48 ; GFX8: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] 49 ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec 50 ; GFX8: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] 51 ; GFX8: SI_RETURN_TO_EPILOG implicit $sgpr0 52 %val = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %rsrc, i32 %soffset, i32 0) 53 ret i32 %val 54} 55 56define amdgpu_ps i32 @s_buffer_load_i32_glc(<4 x i32> inreg %rsrc, i32 inreg %soffset) { 57 ; GFX6-LABEL: name: s_buffer_load_i32_glc 58 ; GFX6: bb.1 (%ir-block.0): 59 ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 60 ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 61 ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 62 ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 63 ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 64 ; GFX6: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 65 ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 66 ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[COPY4]], 1, 0 :: (dereferenceable invariant load 4) 67 ; GFX6: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] 68 ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec 69 ; GFX6: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] 70 ; GFX6: SI_RETURN_TO_EPILOG implicit $sgpr0 71 ; GFX7-LABEL: name: s_buffer_load_i32_glc 72 ; GFX7: bb.1 (%ir-block.0): 73 ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 74 ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 75 ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 76 ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 77 ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 78 ; GFX7: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 79 ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 80 ; GFX7: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[COPY4]], 1, 0 :: (dereferenceable invariant load 4) 81 ; GFX7: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] 82 ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec 83 ; GFX7: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] 84 ; GFX7: SI_RETURN_TO_EPILOG implicit $sgpr0 85 ; GFX8-LABEL: name: s_buffer_load_i32_glc 86 ; GFX8: bb.1 (%ir-block.0): 87 ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 88 ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 89 ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 90 ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 91 ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 92 ; GFX8: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 93 ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 94 ; GFX8: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[COPY4]], 1, 0 :: (dereferenceable invariant load 4) 95 ; GFX8: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] 96 ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec 97 ; GFX8: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] 98 ; GFX8: SI_RETURN_TO_EPILOG implicit $sgpr0 99 %val = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %rsrc, i32 %soffset, i32 1) 100 ret i32 %val 101} 102 103define amdgpu_ps <2 x i32> @s_buffer_load_v2i32(<4 x i32> inreg %rsrc, i32 inreg %soffset) { 104 ; GFX6-LABEL: name: s_buffer_load_v2i32 105 ; GFX6: bb.1 (%ir-block.0): 106 ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 107 ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 108 ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 109 ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 110 ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 111 ; GFX6: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 112 ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 113 ; GFX6: [[S_BUFFER_LOAD_DWORDX2_SGPR:%[0-9]+]]:sreg_64_xexec = S_BUFFER_LOAD_DWORDX2_SGPR [[REG_SEQUENCE]], [[COPY4]], 0, 0 :: (dereferenceable invariant load 8, align 4) 114 ; GFX6: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX2_SGPR]].sub0 115 ; GFX6: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX2_SGPR]].sub1 116 ; GFX6: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] 117 ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec 118 ; GFX6: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] 119 ; GFX6: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY6]] 120 ; GFX6: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec 121 ; GFX6: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] 122 ; GFX6: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1 123 ; GFX7-LABEL: name: s_buffer_load_v2i32 124 ; GFX7: bb.1 (%ir-block.0): 125 ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 126 ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 127 ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 128 ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 129 ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 130 ; GFX7: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 131 ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 132 ; GFX7: [[S_BUFFER_LOAD_DWORDX2_SGPR:%[0-9]+]]:sreg_64_xexec = S_BUFFER_LOAD_DWORDX2_SGPR [[REG_SEQUENCE]], [[COPY4]], 0, 0 :: (dereferenceable invariant load 8, align 4) 133 ; GFX7: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX2_SGPR]].sub0 134 ; GFX7: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX2_SGPR]].sub1 135 ; GFX7: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] 136 ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec 137 ; GFX7: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] 138 ; GFX7: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY6]] 139 ; GFX7: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec 140 ; GFX7: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] 141 ; GFX7: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1 142 ; GFX8-LABEL: name: s_buffer_load_v2i32 143 ; GFX8: bb.1 (%ir-block.0): 144 ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 145 ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 146 ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 147 ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 148 ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 149 ; GFX8: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 150 ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 151 ; GFX8: [[S_BUFFER_LOAD_DWORDX2_SGPR:%[0-9]+]]:sreg_64_xexec = S_BUFFER_LOAD_DWORDX2_SGPR [[REG_SEQUENCE]], [[COPY4]], 0, 0 :: (dereferenceable invariant load 8, align 4) 152 ; GFX8: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX2_SGPR]].sub0 153 ; GFX8: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX2_SGPR]].sub1 154 ; GFX8: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] 155 ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec 156 ; GFX8: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] 157 ; GFX8: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY6]] 158 ; GFX8: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec 159 ; GFX8: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] 160 ; GFX8: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1 161 %val = call <2 x i32> @llvm.amdgcn.s.buffer.load.v2i32(<4 x i32> %rsrc, i32 %soffset, i32 0) 162 ret <2 x i32> %val 163} 164 165define amdgpu_ps <3 x i32> @s_buffer_load_v3i32(<4 x i32> inreg %rsrc, i32 inreg %soffset) { 166 ; GFX6-LABEL: name: s_buffer_load_v3i32 167 ; GFX6: bb.1 (%ir-block.0): 168 ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 169 ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 170 ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 171 ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 172 ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 173 ; GFX6: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 174 ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 175 ; GFX6: [[S_BUFFER_LOAD_DWORDX4_SGPR:%[0-9]+]]:sgpr_128 = S_BUFFER_LOAD_DWORDX4_SGPR [[REG_SEQUENCE]], [[COPY4]], 0, 0 :: (dereferenceable invariant load 12, align 4) 176 ; GFX6: [[DEF:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF 177 ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_512 = REG_SEQUENCE [[S_BUFFER_LOAD_DWORDX4_SGPR]], %subreg.sub0_sub1_sub2_sub3, [[DEF]], %subreg.sub4_sub5_sub6_sub7, [[DEF]], %subreg.sub8_sub9_sub10_sub11 178 ; GFX6: [[COPY5:%[0-9]+]]:sgpr_512_with_sub0_sub1_sub2 = COPY [[REG_SEQUENCE1]] 179 ; GFX6: [[COPY6:%[0-9]+]]:sgpr_96 = COPY [[COPY5]].sub0_sub1_sub2 180 ; GFX6: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[COPY6]].sub0 181 ; GFX6: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[COPY6]].sub1 182 ; GFX6: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[COPY6]].sub2 183 ; GFX6: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY7]] 184 ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec 185 ; GFX6: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] 186 ; GFX6: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[COPY8]] 187 ; GFX6: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec 188 ; GFX6: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] 189 ; GFX6: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[COPY9]] 190 ; GFX6: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec 191 ; GFX6: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]] 192 ; GFX6: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2 193 ; GFX7-LABEL: name: s_buffer_load_v3i32 194 ; GFX7: bb.1 (%ir-block.0): 195 ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 196 ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 197 ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 198 ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 199 ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 200 ; GFX7: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 201 ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 202 ; GFX7: [[S_BUFFER_LOAD_DWORDX4_SGPR:%[0-9]+]]:sgpr_128 = S_BUFFER_LOAD_DWORDX4_SGPR [[REG_SEQUENCE]], [[COPY4]], 0, 0 :: (dereferenceable invariant load 12, align 4) 203 ; GFX7: [[DEF:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF 204 ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_512 = REG_SEQUENCE [[S_BUFFER_LOAD_DWORDX4_SGPR]], %subreg.sub0_sub1_sub2_sub3, [[DEF]], %subreg.sub4_sub5_sub6_sub7, [[DEF]], %subreg.sub8_sub9_sub10_sub11 205 ; GFX7: [[COPY5:%[0-9]+]]:sgpr_512_with_sub0_sub1_sub2 = COPY [[REG_SEQUENCE1]] 206 ; GFX7: [[COPY6:%[0-9]+]]:sgpr_96 = COPY [[COPY5]].sub0_sub1_sub2 207 ; GFX7: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[COPY6]].sub0 208 ; GFX7: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[COPY6]].sub1 209 ; GFX7: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[COPY6]].sub2 210 ; GFX7: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY7]] 211 ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec 212 ; GFX7: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] 213 ; GFX7: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[COPY8]] 214 ; GFX7: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec 215 ; GFX7: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] 216 ; GFX7: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[COPY9]] 217 ; GFX7: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec 218 ; GFX7: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]] 219 ; GFX7: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2 220 ; GFX8-LABEL: name: s_buffer_load_v3i32 221 ; GFX8: bb.1 (%ir-block.0): 222 ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 223 ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 224 ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 225 ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 226 ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 227 ; GFX8: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 228 ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 229 ; GFX8: [[S_BUFFER_LOAD_DWORDX4_SGPR:%[0-9]+]]:sgpr_128 = S_BUFFER_LOAD_DWORDX4_SGPR [[REG_SEQUENCE]], [[COPY4]], 0, 0 :: (dereferenceable invariant load 12, align 4) 230 ; GFX8: [[DEF:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF 231 ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_512 = REG_SEQUENCE [[S_BUFFER_LOAD_DWORDX4_SGPR]], %subreg.sub0_sub1_sub2_sub3, [[DEF]], %subreg.sub4_sub5_sub6_sub7, [[DEF]], %subreg.sub8_sub9_sub10_sub11 232 ; GFX8: [[COPY5:%[0-9]+]]:sgpr_512_with_sub0_sub1_sub2 = COPY [[REG_SEQUENCE1]] 233 ; GFX8: [[COPY6:%[0-9]+]]:sgpr_96 = COPY [[COPY5]].sub0_sub1_sub2 234 ; GFX8: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[COPY6]].sub0 235 ; GFX8: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[COPY6]].sub1 236 ; GFX8: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[COPY6]].sub2 237 ; GFX8: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY7]] 238 ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec 239 ; GFX8: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] 240 ; GFX8: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[COPY8]] 241 ; GFX8: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec 242 ; GFX8: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] 243 ; GFX8: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[COPY9]] 244 ; GFX8: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec 245 ; GFX8: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]] 246 ; GFX8: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2 247 %val = call <3 x i32> @llvm.amdgcn.s.buffer.load.v3i32(<4 x i32> %rsrc, i32 %soffset, i32 0) 248 ret <3 x i32> %val 249} 250 251define amdgpu_ps <8 x i32> @s_buffer_load_v8i32(<4 x i32> inreg %rsrc, i32 inreg %soffset) { 252 ; GFX6-LABEL: name: s_buffer_load_v8i32 253 ; GFX6: bb.1 (%ir-block.0): 254 ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 255 ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 256 ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 257 ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 258 ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 259 ; GFX6: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 260 ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 261 ; GFX6: [[S_BUFFER_LOAD_DWORDX8_SGPR:%[0-9]+]]:sgpr_256 = S_BUFFER_LOAD_DWORDX8_SGPR [[REG_SEQUENCE]], [[COPY4]], 0, 0 :: (dereferenceable invariant load 32, align 4) 262 ; GFX6: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub0 263 ; GFX6: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub1 264 ; GFX6: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub2 265 ; GFX6: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub3 266 ; GFX6: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub4 267 ; GFX6: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub5 268 ; GFX6: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub6 269 ; GFX6: [[COPY12:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub7 270 ; GFX6: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] 271 ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY13]], implicit $exec 272 ; GFX6: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] 273 ; GFX6: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[COPY6]] 274 ; GFX6: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY14]], implicit $exec 275 ; GFX6: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] 276 ; GFX6: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[COPY7]] 277 ; GFX6: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY15]], implicit $exec 278 ; GFX6: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]] 279 ; GFX6: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[COPY8]] 280 ; GFX6: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY16]], implicit $exec 281 ; GFX6: $sgpr3 = COPY [[V_READFIRSTLANE_B32_3]] 282 ; GFX6: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[COPY9]] 283 ; GFX6: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY17]], implicit $exec 284 ; GFX6: $sgpr4 = COPY [[V_READFIRSTLANE_B32_4]] 285 ; GFX6: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[COPY10]] 286 ; GFX6: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY18]], implicit $exec 287 ; GFX6: $sgpr5 = COPY [[V_READFIRSTLANE_B32_5]] 288 ; GFX6: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[COPY11]] 289 ; GFX6: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY19]], implicit $exec 290 ; GFX6: $sgpr6 = COPY [[V_READFIRSTLANE_B32_6]] 291 ; GFX6: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[COPY12]] 292 ; GFX6: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY20]], implicit $exec 293 ; GFX6: $sgpr7 = COPY [[V_READFIRSTLANE_B32_7]] 294 ; GFX6: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7 295 ; GFX7-LABEL: name: s_buffer_load_v8i32 296 ; GFX7: bb.1 (%ir-block.0): 297 ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 298 ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 299 ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 300 ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 301 ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 302 ; GFX7: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 303 ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 304 ; GFX7: [[S_BUFFER_LOAD_DWORDX8_SGPR:%[0-9]+]]:sgpr_256 = S_BUFFER_LOAD_DWORDX8_SGPR [[REG_SEQUENCE]], [[COPY4]], 0, 0 :: (dereferenceable invariant load 32, align 4) 305 ; GFX7: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub0 306 ; GFX7: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub1 307 ; GFX7: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub2 308 ; GFX7: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub3 309 ; GFX7: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub4 310 ; GFX7: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub5 311 ; GFX7: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub6 312 ; GFX7: [[COPY12:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub7 313 ; GFX7: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] 314 ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY13]], implicit $exec 315 ; GFX7: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] 316 ; GFX7: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[COPY6]] 317 ; GFX7: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY14]], implicit $exec 318 ; GFX7: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] 319 ; GFX7: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[COPY7]] 320 ; GFX7: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY15]], implicit $exec 321 ; GFX7: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]] 322 ; GFX7: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[COPY8]] 323 ; GFX7: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY16]], implicit $exec 324 ; GFX7: $sgpr3 = COPY [[V_READFIRSTLANE_B32_3]] 325 ; GFX7: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[COPY9]] 326 ; GFX7: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY17]], implicit $exec 327 ; GFX7: $sgpr4 = COPY [[V_READFIRSTLANE_B32_4]] 328 ; GFX7: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[COPY10]] 329 ; GFX7: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY18]], implicit $exec 330 ; GFX7: $sgpr5 = COPY [[V_READFIRSTLANE_B32_5]] 331 ; GFX7: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[COPY11]] 332 ; GFX7: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY19]], implicit $exec 333 ; GFX7: $sgpr6 = COPY [[V_READFIRSTLANE_B32_6]] 334 ; GFX7: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[COPY12]] 335 ; GFX7: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY20]], implicit $exec 336 ; GFX7: $sgpr7 = COPY [[V_READFIRSTLANE_B32_7]] 337 ; GFX7: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7 338 ; GFX8-LABEL: name: s_buffer_load_v8i32 339 ; GFX8: bb.1 (%ir-block.0): 340 ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 341 ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 342 ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 343 ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 344 ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 345 ; GFX8: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 346 ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 347 ; GFX8: [[S_BUFFER_LOAD_DWORDX8_SGPR:%[0-9]+]]:sgpr_256 = S_BUFFER_LOAD_DWORDX8_SGPR [[REG_SEQUENCE]], [[COPY4]], 0, 0 :: (dereferenceable invariant load 32, align 4) 348 ; GFX8: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub0 349 ; GFX8: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub1 350 ; GFX8: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub2 351 ; GFX8: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub3 352 ; GFX8: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub4 353 ; GFX8: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub5 354 ; GFX8: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub6 355 ; GFX8: [[COPY12:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub7 356 ; GFX8: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] 357 ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY13]], implicit $exec 358 ; GFX8: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] 359 ; GFX8: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[COPY6]] 360 ; GFX8: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY14]], implicit $exec 361 ; GFX8: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] 362 ; GFX8: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[COPY7]] 363 ; GFX8: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY15]], implicit $exec 364 ; GFX8: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]] 365 ; GFX8: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[COPY8]] 366 ; GFX8: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY16]], implicit $exec 367 ; GFX8: $sgpr3 = COPY [[V_READFIRSTLANE_B32_3]] 368 ; GFX8: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[COPY9]] 369 ; GFX8: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY17]], implicit $exec 370 ; GFX8: $sgpr4 = COPY [[V_READFIRSTLANE_B32_4]] 371 ; GFX8: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[COPY10]] 372 ; GFX8: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY18]], implicit $exec 373 ; GFX8: $sgpr5 = COPY [[V_READFIRSTLANE_B32_5]] 374 ; GFX8: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[COPY11]] 375 ; GFX8: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY19]], implicit $exec 376 ; GFX8: $sgpr6 = COPY [[V_READFIRSTLANE_B32_6]] 377 ; GFX8: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[COPY12]] 378 ; GFX8: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY20]], implicit $exec 379 ; GFX8: $sgpr7 = COPY [[V_READFIRSTLANE_B32_7]] 380 ; GFX8: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7 381 %val = call <8 x i32> @llvm.amdgcn.s.buffer.load.v8i32(<4 x i32> %rsrc, i32 %soffset, i32 0) 382 ret <8 x i32> %val 383} 384 385define amdgpu_ps <16 x i32> @s_buffer_load_v16i32(<4 x i32> inreg %rsrc, i32 inreg %soffset) { 386 ; GFX6-LABEL: name: s_buffer_load_v16i32 387 ; GFX6: bb.1 (%ir-block.0): 388 ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 389 ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 390 ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 391 ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 392 ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 393 ; GFX6: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 394 ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 395 ; GFX6: [[S_BUFFER_LOAD_DWORDX16_SGPR:%[0-9]+]]:sgpr_512 = S_BUFFER_LOAD_DWORDX16_SGPR [[REG_SEQUENCE]], [[COPY4]], 0, 0 :: (dereferenceable invariant load 64, align 4) 396 ; GFX6: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub0 397 ; GFX6: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub1 398 ; GFX6: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub2 399 ; GFX6: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub3 400 ; GFX6: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub4 401 ; GFX6: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub5 402 ; GFX6: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub6 403 ; GFX6: [[COPY12:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub7 404 ; GFX6: [[COPY13:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub8 405 ; GFX6: [[COPY14:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub9 406 ; GFX6: [[COPY15:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub10 407 ; GFX6: [[COPY16:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub11 408 ; GFX6: [[COPY17:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub12 409 ; GFX6: [[COPY18:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub13 410 ; GFX6: [[COPY19:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub14 411 ; GFX6: [[COPY20:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub15 412 ; GFX6: [[COPY21:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] 413 ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY21]], implicit $exec 414 ; GFX6: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] 415 ; GFX6: [[COPY22:%[0-9]+]]:vgpr_32 = COPY [[COPY6]] 416 ; GFX6: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY22]], implicit $exec 417 ; GFX6: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] 418 ; GFX6: [[COPY23:%[0-9]+]]:vgpr_32 = COPY [[COPY7]] 419 ; GFX6: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY23]], implicit $exec 420 ; GFX6: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]] 421 ; GFX6: [[COPY24:%[0-9]+]]:vgpr_32 = COPY [[COPY8]] 422 ; GFX6: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY24]], implicit $exec 423 ; GFX6: $sgpr3 = COPY [[V_READFIRSTLANE_B32_3]] 424 ; GFX6: [[COPY25:%[0-9]+]]:vgpr_32 = COPY [[COPY9]] 425 ; GFX6: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY25]], implicit $exec 426 ; GFX6: $sgpr4 = COPY [[V_READFIRSTLANE_B32_4]] 427 ; GFX6: [[COPY26:%[0-9]+]]:vgpr_32 = COPY [[COPY10]] 428 ; GFX6: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY26]], implicit $exec 429 ; GFX6: $sgpr5 = COPY [[V_READFIRSTLANE_B32_5]] 430 ; GFX6: [[COPY27:%[0-9]+]]:vgpr_32 = COPY [[COPY11]] 431 ; GFX6: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY27]], implicit $exec 432 ; GFX6: $sgpr6 = COPY [[V_READFIRSTLANE_B32_6]] 433 ; GFX6: [[COPY28:%[0-9]+]]:vgpr_32 = COPY [[COPY12]] 434 ; GFX6: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY28]], implicit $exec 435 ; GFX6: $sgpr7 = COPY [[V_READFIRSTLANE_B32_7]] 436 ; GFX6: [[COPY29:%[0-9]+]]:vgpr_32 = COPY [[COPY13]] 437 ; GFX6: [[V_READFIRSTLANE_B32_8:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY29]], implicit $exec 438 ; GFX6: $sgpr8 = COPY [[V_READFIRSTLANE_B32_8]] 439 ; GFX6: [[COPY30:%[0-9]+]]:vgpr_32 = COPY [[COPY14]] 440 ; GFX6: [[V_READFIRSTLANE_B32_9:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY30]], implicit $exec 441 ; GFX6: $sgpr9 = COPY [[V_READFIRSTLANE_B32_9]] 442 ; GFX6: [[COPY31:%[0-9]+]]:vgpr_32 = COPY [[COPY15]] 443 ; GFX6: [[V_READFIRSTLANE_B32_10:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY31]], implicit $exec 444 ; GFX6: $sgpr10 = COPY [[V_READFIRSTLANE_B32_10]] 445 ; GFX6: [[COPY32:%[0-9]+]]:vgpr_32 = COPY [[COPY16]] 446 ; GFX6: [[V_READFIRSTLANE_B32_11:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY32]], implicit $exec 447 ; GFX6: $sgpr11 = COPY [[V_READFIRSTLANE_B32_11]] 448 ; GFX6: [[COPY33:%[0-9]+]]:vgpr_32 = COPY [[COPY17]] 449 ; GFX6: [[V_READFIRSTLANE_B32_12:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY33]], implicit $exec 450 ; GFX6: $sgpr12 = COPY [[V_READFIRSTLANE_B32_12]] 451 ; GFX6: [[COPY34:%[0-9]+]]:vgpr_32 = COPY [[COPY18]] 452 ; GFX6: [[V_READFIRSTLANE_B32_13:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY34]], implicit $exec 453 ; GFX6: $sgpr13 = COPY [[V_READFIRSTLANE_B32_13]] 454 ; GFX6: [[COPY35:%[0-9]+]]:vgpr_32 = COPY [[COPY19]] 455 ; GFX6: [[V_READFIRSTLANE_B32_14:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY35]], implicit $exec 456 ; GFX6: $sgpr14 = COPY [[V_READFIRSTLANE_B32_14]] 457 ; GFX6: [[COPY36:%[0-9]+]]:vgpr_32 = COPY [[COPY20]] 458 ; GFX6: [[V_READFIRSTLANE_B32_15:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY36]], implicit $exec 459 ; GFX6: $sgpr15 = COPY [[V_READFIRSTLANE_B32_15]] 460 ; GFX6: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15 461 ; GFX7-LABEL: name: s_buffer_load_v16i32 462 ; GFX7: bb.1 (%ir-block.0): 463 ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 464 ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 465 ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 466 ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 467 ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 468 ; GFX7: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 469 ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 470 ; GFX7: [[S_BUFFER_LOAD_DWORDX16_SGPR:%[0-9]+]]:sgpr_512 = S_BUFFER_LOAD_DWORDX16_SGPR [[REG_SEQUENCE]], [[COPY4]], 0, 0 :: (dereferenceable invariant load 64, align 4) 471 ; GFX7: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub0 472 ; GFX7: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub1 473 ; GFX7: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub2 474 ; GFX7: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub3 475 ; GFX7: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub4 476 ; GFX7: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub5 477 ; GFX7: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub6 478 ; GFX7: [[COPY12:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub7 479 ; GFX7: [[COPY13:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub8 480 ; GFX7: [[COPY14:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub9 481 ; GFX7: [[COPY15:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub10 482 ; GFX7: [[COPY16:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub11 483 ; GFX7: [[COPY17:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub12 484 ; GFX7: [[COPY18:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub13 485 ; GFX7: [[COPY19:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub14 486 ; GFX7: [[COPY20:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub15 487 ; GFX7: [[COPY21:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] 488 ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY21]], implicit $exec 489 ; GFX7: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] 490 ; GFX7: [[COPY22:%[0-9]+]]:vgpr_32 = COPY [[COPY6]] 491 ; GFX7: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY22]], implicit $exec 492 ; GFX7: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] 493 ; GFX7: [[COPY23:%[0-9]+]]:vgpr_32 = COPY [[COPY7]] 494 ; GFX7: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY23]], implicit $exec 495 ; GFX7: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]] 496 ; GFX7: [[COPY24:%[0-9]+]]:vgpr_32 = COPY [[COPY8]] 497 ; GFX7: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY24]], implicit $exec 498 ; GFX7: $sgpr3 = COPY [[V_READFIRSTLANE_B32_3]] 499 ; GFX7: [[COPY25:%[0-9]+]]:vgpr_32 = COPY [[COPY9]] 500 ; GFX7: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY25]], implicit $exec 501 ; GFX7: $sgpr4 = COPY [[V_READFIRSTLANE_B32_4]] 502 ; GFX7: [[COPY26:%[0-9]+]]:vgpr_32 = COPY [[COPY10]] 503 ; GFX7: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY26]], implicit $exec 504 ; GFX7: $sgpr5 = COPY [[V_READFIRSTLANE_B32_5]] 505 ; GFX7: [[COPY27:%[0-9]+]]:vgpr_32 = COPY [[COPY11]] 506 ; GFX7: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY27]], implicit $exec 507 ; GFX7: $sgpr6 = COPY [[V_READFIRSTLANE_B32_6]] 508 ; GFX7: [[COPY28:%[0-9]+]]:vgpr_32 = COPY [[COPY12]] 509 ; GFX7: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY28]], implicit $exec 510 ; GFX7: $sgpr7 = COPY [[V_READFIRSTLANE_B32_7]] 511 ; GFX7: [[COPY29:%[0-9]+]]:vgpr_32 = COPY [[COPY13]] 512 ; GFX7: [[V_READFIRSTLANE_B32_8:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY29]], implicit $exec 513 ; GFX7: $sgpr8 = COPY [[V_READFIRSTLANE_B32_8]] 514 ; GFX7: [[COPY30:%[0-9]+]]:vgpr_32 = COPY [[COPY14]] 515 ; GFX7: [[V_READFIRSTLANE_B32_9:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY30]], implicit $exec 516 ; GFX7: $sgpr9 = COPY [[V_READFIRSTLANE_B32_9]] 517 ; GFX7: [[COPY31:%[0-9]+]]:vgpr_32 = COPY [[COPY15]] 518 ; GFX7: [[V_READFIRSTLANE_B32_10:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY31]], implicit $exec 519 ; GFX7: $sgpr10 = COPY [[V_READFIRSTLANE_B32_10]] 520 ; GFX7: [[COPY32:%[0-9]+]]:vgpr_32 = COPY [[COPY16]] 521 ; GFX7: [[V_READFIRSTLANE_B32_11:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY32]], implicit $exec 522 ; GFX7: $sgpr11 = COPY [[V_READFIRSTLANE_B32_11]] 523 ; GFX7: [[COPY33:%[0-9]+]]:vgpr_32 = COPY [[COPY17]] 524 ; GFX7: [[V_READFIRSTLANE_B32_12:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY33]], implicit $exec 525 ; GFX7: $sgpr12 = COPY [[V_READFIRSTLANE_B32_12]] 526 ; GFX7: [[COPY34:%[0-9]+]]:vgpr_32 = COPY [[COPY18]] 527 ; GFX7: [[V_READFIRSTLANE_B32_13:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY34]], implicit $exec 528 ; GFX7: $sgpr13 = COPY [[V_READFIRSTLANE_B32_13]] 529 ; GFX7: [[COPY35:%[0-9]+]]:vgpr_32 = COPY [[COPY19]] 530 ; GFX7: [[V_READFIRSTLANE_B32_14:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY35]], implicit $exec 531 ; GFX7: $sgpr14 = COPY [[V_READFIRSTLANE_B32_14]] 532 ; GFX7: [[COPY36:%[0-9]+]]:vgpr_32 = COPY [[COPY20]] 533 ; GFX7: [[V_READFIRSTLANE_B32_15:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY36]], implicit $exec 534 ; GFX7: $sgpr15 = COPY [[V_READFIRSTLANE_B32_15]] 535 ; GFX7: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15 536 ; GFX8-LABEL: name: s_buffer_load_v16i32 537 ; GFX8: bb.1 (%ir-block.0): 538 ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 539 ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 540 ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 541 ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 542 ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 543 ; GFX8: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 544 ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 545 ; GFX8: [[S_BUFFER_LOAD_DWORDX16_SGPR:%[0-9]+]]:sgpr_512 = S_BUFFER_LOAD_DWORDX16_SGPR [[REG_SEQUENCE]], [[COPY4]], 0, 0 :: (dereferenceable invariant load 64, align 4) 546 ; GFX8: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub0 547 ; GFX8: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub1 548 ; GFX8: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub2 549 ; GFX8: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub3 550 ; GFX8: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub4 551 ; GFX8: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub5 552 ; GFX8: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub6 553 ; GFX8: [[COPY12:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub7 554 ; GFX8: [[COPY13:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub8 555 ; GFX8: [[COPY14:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub9 556 ; GFX8: [[COPY15:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub10 557 ; GFX8: [[COPY16:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub11 558 ; GFX8: [[COPY17:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub12 559 ; GFX8: [[COPY18:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub13 560 ; GFX8: [[COPY19:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub14 561 ; GFX8: [[COPY20:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub15 562 ; GFX8: [[COPY21:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] 563 ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY21]], implicit $exec 564 ; GFX8: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] 565 ; GFX8: [[COPY22:%[0-9]+]]:vgpr_32 = COPY [[COPY6]] 566 ; GFX8: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY22]], implicit $exec 567 ; GFX8: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] 568 ; GFX8: [[COPY23:%[0-9]+]]:vgpr_32 = COPY [[COPY7]] 569 ; GFX8: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY23]], implicit $exec 570 ; GFX8: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]] 571 ; GFX8: [[COPY24:%[0-9]+]]:vgpr_32 = COPY [[COPY8]] 572 ; GFX8: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY24]], implicit $exec 573 ; GFX8: $sgpr3 = COPY [[V_READFIRSTLANE_B32_3]] 574 ; GFX8: [[COPY25:%[0-9]+]]:vgpr_32 = COPY [[COPY9]] 575 ; GFX8: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY25]], implicit $exec 576 ; GFX8: $sgpr4 = COPY [[V_READFIRSTLANE_B32_4]] 577 ; GFX8: [[COPY26:%[0-9]+]]:vgpr_32 = COPY [[COPY10]] 578 ; GFX8: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY26]], implicit $exec 579 ; GFX8: $sgpr5 = COPY [[V_READFIRSTLANE_B32_5]] 580 ; GFX8: [[COPY27:%[0-9]+]]:vgpr_32 = COPY [[COPY11]] 581 ; GFX8: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY27]], implicit $exec 582 ; GFX8: $sgpr6 = COPY [[V_READFIRSTLANE_B32_6]] 583 ; GFX8: [[COPY28:%[0-9]+]]:vgpr_32 = COPY [[COPY12]] 584 ; GFX8: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY28]], implicit $exec 585 ; GFX8: $sgpr7 = COPY [[V_READFIRSTLANE_B32_7]] 586 ; GFX8: [[COPY29:%[0-9]+]]:vgpr_32 = COPY [[COPY13]] 587 ; GFX8: [[V_READFIRSTLANE_B32_8:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY29]], implicit $exec 588 ; GFX8: $sgpr8 = COPY [[V_READFIRSTLANE_B32_8]] 589 ; GFX8: [[COPY30:%[0-9]+]]:vgpr_32 = COPY [[COPY14]] 590 ; GFX8: [[V_READFIRSTLANE_B32_9:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY30]], implicit $exec 591 ; GFX8: $sgpr9 = COPY [[V_READFIRSTLANE_B32_9]] 592 ; GFX8: [[COPY31:%[0-9]+]]:vgpr_32 = COPY [[COPY15]] 593 ; GFX8: [[V_READFIRSTLANE_B32_10:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY31]], implicit $exec 594 ; GFX8: $sgpr10 = COPY [[V_READFIRSTLANE_B32_10]] 595 ; GFX8: [[COPY32:%[0-9]+]]:vgpr_32 = COPY [[COPY16]] 596 ; GFX8: [[V_READFIRSTLANE_B32_11:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY32]], implicit $exec 597 ; GFX8: $sgpr11 = COPY [[V_READFIRSTLANE_B32_11]] 598 ; GFX8: [[COPY33:%[0-9]+]]:vgpr_32 = COPY [[COPY17]] 599 ; GFX8: [[V_READFIRSTLANE_B32_12:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY33]], implicit $exec 600 ; GFX8: $sgpr12 = COPY [[V_READFIRSTLANE_B32_12]] 601 ; GFX8: [[COPY34:%[0-9]+]]:vgpr_32 = COPY [[COPY18]] 602 ; GFX8: [[V_READFIRSTLANE_B32_13:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY34]], implicit $exec 603 ; GFX8: $sgpr13 = COPY [[V_READFIRSTLANE_B32_13]] 604 ; GFX8: [[COPY35:%[0-9]+]]:vgpr_32 = COPY [[COPY19]] 605 ; GFX8: [[V_READFIRSTLANE_B32_14:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY35]], implicit $exec 606 ; GFX8: $sgpr14 = COPY [[V_READFIRSTLANE_B32_14]] 607 ; GFX8: [[COPY36:%[0-9]+]]:vgpr_32 = COPY [[COPY20]] 608 ; GFX8: [[V_READFIRSTLANE_B32_15:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY36]], implicit $exec 609 ; GFX8: $sgpr15 = COPY [[V_READFIRSTLANE_B32_15]] 610 ; GFX8: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15 611 %val = call <16 x i32> @llvm.amdgcn.s.buffer.load.v16i32(<4 x i32> %rsrc, i32 %soffset, i32 0) 612 ret <16 x i32> %val 613} 614 615define amdgpu_ps i32 @s_buffer_load_i32_offset_1(<4 x i32> inreg %rsrc) { 616 ; GFX6-LABEL: name: s_buffer_load_i32_offset_1 617 ; GFX6: bb.1 (%ir-block.0): 618 ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 619 ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 620 ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 621 ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 622 ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 623 ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 624 ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1 625 ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load 4) 626 ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] 627 ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec 628 ; GFX6: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] 629 ; GFX6: SI_RETURN_TO_EPILOG implicit $sgpr0 630 ; GFX7-LABEL: name: s_buffer_load_i32_offset_1 631 ; GFX7: bb.1 (%ir-block.0): 632 ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 633 ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 634 ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 635 ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 636 ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 637 ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 638 ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1 639 ; GFX7: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load 4) 640 ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] 641 ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec 642 ; GFX7: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] 643 ; GFX7: SI_RETURN_TO_EPILOG implicit $sgpr0 644 ; GFX8-LABEL: name: s_buffer_load_i32_offset_1 645 ; GFX8: bb.1 (%ir-block.0): 646 ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 647 ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 648 ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 649 ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 650 ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 651 ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 652 ; GFX8: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 1, 0, 0 :: (dereferenceable invariant load 4) 653 ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]] 654 ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec 655 ; GFX8: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] 656 ; GFX8: SI_RETURN_TO_EPILOG implicit $sgpr0 657 %val = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %rsrc, i32 1, i32 0) 658 ret i32 %val 659} 660 661define amdgpu_ps i32 @s_buffer_load_i32_offset_glc_4(<4 x i32> inreg %rsrc) { 662 ; GFX6-LABEL: name: s_buffer_load_i32_offset_glc_4 663 ; GFX6: bb.1 (%ir-block.0): 664 ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 665 ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 666 ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 667 ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 668 ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 669 ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 670 ; GFX6: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 1, 1, 0 :: (dereferenceable invariant load 4) 671 ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]] 672 ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec 673 ; GFX6: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] 674 ; GFX6: SI_RETURN_TO_EPILOG implicit $sgpr0 675 ; GFX7-LABEL: name: s_buffer_load_i32_offset_glc_4 676 ; GFX7: bb.1 (%ir-block.0): 677 ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 678 ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 679 ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 680 ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 681 ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 682 ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 683 ; GFX7: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 1, 1, 0 :: (dereferenceable invariant load 4) 684 ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]] 685 ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec 686 ; GFX7: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] 687 ; GFX7: SI_RETURN_TO_EPILOG implicit $sgpr0 688 ; GFX8-LABEL: name: s_buffer_load_i32_offset_glc_4 689 ; GFX8: bb.1 (%ir-block.0): 690 ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 691 ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 692 ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 693 ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 694 ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 695 ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 696 ; GFX8: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 4, 1, 0 :: (dereferenceable invariant load 4) 697 ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]] 698 ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec 699 ; GFX8: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] 700 ; GFX8: SI_RETURN_TO_EPILOG implicit $sgpr0 701 %val = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %rsrc, i32 4, i32 1) 702 ret i32 %val 703} 704 705define amdgpu_ps i32 @s_buffer_load_i32_offset_255(<4 x i32> inreg %rsrc) { 706 ; GFX6-LABEL: name: s_buffer_load_i32_offset_255 707 ; GFX6: bb.1 (%ir-block.0): 708 ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 709 ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 710 ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 711 ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 712 ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 713 ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 714 ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 255 715 ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load 4) 716 ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] 717 ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec 718 ; GFX6: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] 719 ; GFX6: SI_RETURN_TO_EPILOG implicit $sgpr0 720 ; GFX7-LABEL: name: s_buffer_load_i32_offset_255 721 ; GFX7: bb.1 (%ir-block.0): 722 ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 723 ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 724 ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 725 ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 726 ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 727 ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 728 ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 255 729 ; GFX7: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load 4) 730 ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] 731 ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec 732 ; GFX7: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] 733 ; GFX7: SI_RETURN_TO_EPILOG implicit $sgpr0 734 ; GFX8-LABEL: name: s_buffer_load_i32_offset_255 735 ; GFX8: bb.1 (%ir-block.0): 736 ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 737 ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 738 ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 739 ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 740 ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 741 ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 742 ; GFX8: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 255, 0, 0 :: (dereferenceable invariant load 4) 743 ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]] 744 ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec 745 ; GFX8: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] 746 ; GFX8: SI_RETURN_TO_EPILOG implicit $sgpr0 747 %val = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %rsrc, i32 255, i32 0) 748 ret i32 %val 749} 750 751define amdgpu_ps i32 @s_buffer_load_i32_offset_256(<4 x i32> inreg %rsrc) { 752 ; GFX6-LABEL: name: s_buffer_load_i32_offset_256 753 ; GFX6: bb.1 (%ir-block.0): 754 ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 755 ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 756 ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 757 ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 758 ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 759 ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 760 ; GFX6: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 64, 0, 0 :: (dereferenceable invariant load 4) 761 ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]] 762 ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec 763 ; GFX6: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] 764 ; GFX6: SI_RETURN_TO_EPILOG implicit $sgpr0 765 ; GFX7-LABEL: name: s_buffer_load_i32_offset_256 766 ; GFX7: bb.1 (%ir-block.0): 767 ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 768 ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 769 ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 770 ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 771 ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 772 ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 773 ; GFX7: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 64, 0, 0 :: (dereferenceable invariant load 4) 774 ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]] 775 ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec 776 ; GFX7: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] 777 ; GFX7: SI_RETURN_TO_EPILOG implicit $sgpr0 778 ; GFX8-LABEL: name: s_buffer_load_i32_offset_256 779 ; GFX8: bb.1 (%ir-block.0): 780 ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 781 ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 782 ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 783 ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 784 ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 785 ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 786 ; GFX8: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 256, 0, 0 :: (dereferenceable invariant load 4) 787 ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]] 788 ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec 789 ; GFX8: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] 790 ; GFX8: SI_RETURN_TO_EPILOG implicit $sgpr0 791 %val = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %rsrc, i32 256, i32 0) 792 ret i32 %val 793} 794 795define amdgpu_ps i32 @s_buffer_load_i32_offset_1020(<4 x i32> inreg %rsrc) { 796 ; GFX6-LABEL: name: s_buffer_load_i32_offset_1020 797 ; GFX6: bb.1 (%ir-block.0): 798 ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 799 ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 800 ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 801 ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 802 ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 803 ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 804 ; GFX6: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 255, 0, 0 :: (dereferenceable invariant load 4) 805 ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]] 806 ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec 807 ; GFX6: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] 808 ; GFX6: SI_RETURN_TO_EPILOG implicit $sgpr0 809 ; GFX7-LABEL: name: s_buffer_load_i32_offset_1020 810 ; GFX7: bb.1 (%ir-block.0): 811 ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 812 ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 813 ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 814 ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 815 ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 816 ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 817 ; GFX7: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 255, 0, 0 :: (dereferenceable invariant load 4) 818 ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]] 819 ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec 820 ; GFX7: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] 821 ; GFX7: SI_RETURN_TO_EPILOG implicit $sgpr0 822 ; GFX8-LABEL: name: s_buffer_load_i32_offset_1020 823 ; GFX8: bb.1 (%ir-block.0): 824 ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 825 ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 826 ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 827 ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 828 ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 829 ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 830 ; GFX8: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 1020, 0, 0 :: (dereferenceable invariant load 4) 831 ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]] 832 ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec 833 ; GFX8: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] 834 ; GFX8: SI_RETURN_TO_EPILOG implicit $sgpr0 835 %val = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %rsrc, i32 1020, i32 0) 836 ret i32 %val 837} 838 839define amdgpu_ps i32 @s_buffer_load_i32_offset_1023(<4 x i32> inreg %rsrc) { 840 ; GFX6-LABEL: name: s_buffer_load_i32_offset_1023 841 ; GFX6: bb.1 (%ir-block.0): 842 ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 843 ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 844 ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 845 ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 846 ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 847 ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 848 ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1023 849 ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load 4) 850 ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] 851 ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec 852 ; GFX6: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] 853 ; GFX6: SI_RETURN_TO_EPILOG implicit $sgpr0 854 ; GFX7-LABEL: name: s_buffer_load_i32_offset_1023 855 ; GFX7: bb.1 (%ir-block.0): 856 ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 857 ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 858 ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 859 ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 860 ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 861 ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 862 ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1023 863 ; GFX7: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load 4) 864 ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] 865 ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec 866 ; GFX7: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] 867 ; GFX7: SI_RETURN_TO_EPILOG implicit $sgpr0 868 ; GFX8-LABEL: name: s_buffer_load_i32_offset_1023 869 ; GFX8: bb.1 (%ir-block.0): 870 ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 871 ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 872 ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 873 ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 874 ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 875 ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 876 ; GFX8: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 1023, 0, 0 :: (dereferenceable invariant load 4) 877 ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]] 878 ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec 879 ; GFX8: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] 880 ; GFX8: SI_RETURN_TO_EPILOG implicit $sgpr0 881 %val = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %rsrc, i32 1023, i32 0) 882 ret i32 %val 883} 884 885define amdgpu_ps i32 @s_buffer_load_i32_offset_1024(<4 x i32> inreg %rsrc) { 886 ; GFX6-LABEL: name: s_buffer_load_i32_offset_1024 887 ; GFX6: bb.1 (%ir-block.0): 888 ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 889 ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 890 ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 891 ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 892 ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 893 ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 894 ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1024 895 ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load 4) 896 ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] 897 ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec 898 ; GFX6: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] 899 ; GFX6: SI_RETURN_TO_EPILOG implicit $sgpr0 900 ; GFX7-LABEL: name: s_buffer_load_i32_offset_1024 901 ; GFX7: bb.1 (%ir-block.0): 902 ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 903 ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 904 ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 905 ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 906 ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 907 ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 908 ; GFX7: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 256, 0, 0 :: (dereferenceable invariant load 4) 909 ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]] 910 ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec 911 ; GFX7: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] 912 ; GFX7: SI_RETURN_TO_EPILOG implicit $sgpr0 913 ; GFX8-LABEL: name: s_buffer_load_i32_offset_1024 914 ; GFX8: bb.1 (%ir-block.0): 915 ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 916 ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 917 ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 918 ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 919 ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 920 ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 921 ; GFX8: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 1024, 0, 0 :: (dereferenceable invariant load 4) 922 ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]] 923 ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec 924 ; GFX8: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] 925 ; GFX8: SI_RETURN_TO_EPILOG implicit $sgpr0 926 %val = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %rsrc, i32 1024, i32 0) 927 ret i32 %val 928} 929 930define amdgpu_ps i32 @s_buffer_load_i32_offset_1025(<4 x i32> inreg %rsrc) { 931 ; GFX6-LABEL: name: s_buffer_load_i32_offset_1025 932 ; GFX6: bb.1 (%ir-block.0): 933 ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 934 ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 935 ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 936 ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 937 ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 938 ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 939 ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1025 940 ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load 4) 941 ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] 942 ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec 943 ; GFX6: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] 944 ; GFX6: SI_RETURN_TO_EPILOG implicit $sgpr0 945 ; GFX7-LABEL: name: s_buffer_load_i32_offset_1025 946 ; GFX7: bb.1 (%ir-block.0): 947 ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 948 ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 949 ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 950 ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 951 ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 952 ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 953 ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1025 954 ; GFX7: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load 4) 955 ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] 956 ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec 957 ; GFX7: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] 958 ; GFX7: SI_RETURN_TO_EPILOG implicit $sgpr0 959 ; GFX8-LABEL: name: s_buffer_load_i32_offset_1025 960 ; GFX8: bb.1 (%ir-block.0): 961 ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 962 ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 963 ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 964 ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 965 ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 966 ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 967 ; GFX8: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 1025, 0, 0 :: (dereferenceable invariant load 4) 968 ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]] 969 ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec 970 ; GFX8: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] 971 ; GFX8: SI_RETURN_TO_EPILOG implicit $sgpr0 972 %val = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %rsrc, i32 1025, i32 0) 973 ret i32 %val 974} 975 976define amdgpu_ps i32 @s_buffer_load_i32_offset_neg1(<4 x i32> inreg %desc) { 977 ; GFX6-LABEL: name: s_buffer_load_i32_offset_neg1 978 ; GFX6: bb.1 (%ir-block.0): 979 ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 980 ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 981 ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 982 ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 983 ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 984 ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 985 ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 986 ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load 4) 987 ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] 988 ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec 989 ; GFX6: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] 990 ; GFX6: SI_RETURN_TO_EPILOG implicit $sgpr0 991 ; GFX7-LABEL: name: s_buffer_load_i32_offset_neg1 992 ; GFX7: bb.1 (%ir-block.0): 993 ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 994 ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 995 ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 996 ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 997 ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 998 ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 999 ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 1000 ; GFX7: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load 4) 1001 ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] 1002 ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec 1003 ; GFX7: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] 1004 ; GFX7: SI_RETURN_TO_EPILOG implicit $sgpr0 1005 ; GFX8-LABEL: name: s_buffer_load_i32_offset_neg1 1006 ; GFX8: bb.1 (%ir-block.0): 1007 ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 1008 ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 1009 ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 1010 ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 1011 ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 1012 ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 1013 ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 1014 ; GFX8: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load 4) 1015 ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] 1016 ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec 1017 ; GFX8: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] 1018 ; GFX8: SI_RETURN_TO_EPILOG implicit $sgpr0 1019 %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 -1, i32 0) 1020 ret i32 %load 1021} 1022 1023define amdgpu_ps i32 @s_buffer_load_i32_offset_neg4(<4 x i32> inreg %desc) { 1024 ; GFX6-LABEL: name: s_buffer_load_i32_offset_neg4 1025 ; GFX6: bb.1 (%ir-block.0): 1026 ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 1027 ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 1028 ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 1029 ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 1030 ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 1031 ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 1032 ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -4 1033 ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load 4) 1034 ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] 1035 ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec 1036 ; GFX6: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] 1037 ; GFX6: SI_RETURN_TO_EPILOG implicit $sgpr0 1038 ; GFX7-LABEL: name: s_buffer_load_i32_offset_neg4 1039 ; GFX7: bb.1 (%ir-block.0): 1040 ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 1041 ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 1042 ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 1043 ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 1044 ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 1045 ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 1046 ; GFX7: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 1073741823, 0, 0 :: (dereferenceable invariant load 4) 1047 ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]] 1048 ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec 1049 ; GFX7: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] 1050 ; GFX7: SI_RETURN_TO_EPILOG implicit $sgpr0 1051 ; GFX8-LABEL: name: s_buffer_load_i32_offset_neg4 1052 ; GFX8: bb.1 (%ir-block.0): 1053 ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 1054 ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 1055 ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 1056 ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 1057 ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 1058 ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 1059 ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -4 1060 ; GFX8: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load 4) 1061 ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] 1062 ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec 1063 ; GFX8: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] 1064 ; GFX8: SI_RETURN_TO_EPILOG implicit $sgpr0 1065 %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 -4, i32 0) 1066 ret i32 %load 1067} 1068 1069define amdgpu_ps i32 @s_buffer_load_i32_offset_neg8(<4 x i32> inreg %desc) { 1070 ; GFX6-LABEL: name: s_buffer_load_i32_offset_neg8 1071 ; GFX6: bb.1 (%ir-block.0): 1072 ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 1073 ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 1074 ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 1075 ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 1076 ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 1077 ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 1078 ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -8 1079 ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load 4) 1080 ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] 1081 ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec 1082 ; GFX6: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] 1083 ; GFX6: SI_RETURN_TO_EPILOG implicit $sgpr0 1084 ; GFX7-LABEL: name: s_buffer_load_i32_offset_neg8 1085 ; GFX7: bb.1 (%ir-block.0): 1086 ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 1087 ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 1088 ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 1089 ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 1090 ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 1091 ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 1092 ; GFX7: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 1073741822, 0, 0 :: (dereferenceable invariant load 4) 1093 ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]] 1094 ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec 1095 ; GFX7: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] 1096 ; GFX7: SI_RETURN_TO_EPILOG implicit $sgpr0 1097 ; GFX8-LABEL: name: s_buffer_load_i32_offset_neg8 1098 ; GFX8: bb.1 (%ir-block.0): 1099 ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 1100 ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 1101 ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 1102 ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 1103 ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 1104 ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 1105 ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -8 1106 ; GFX8: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load 4) 1107 ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] 1108 ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec 1109 ; GFX8: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] 1110 ; GFX8: SI_RETURN_TO_EPILOG implicit $sgpr0 1111 %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 -8, i32 0) 1112 ret i32 %load 1113} 1114 1115define amdgpu_ps i32 @s_buffer_load_i32_offset_bit31(<4 x i32> inreg %desc) { 1116 ; GFX6-LABEL: name: s_buffer_load_i32_offset_bit31 1117 ; GFX6: bb.1 (%ir-block.0): 1118 ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 1119 ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 1120 ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 1121 ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 1122 ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 1123 ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 1124 ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648 1125 ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load 4) 1126 ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] 1127 ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec 1128 ; GFX6: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] 1129 ; GFX6: SI_RETURN_TO_EPILOG implicit $sgpr0 1130 ; GFX7-LABEL: name: s_buffer_load_i32_offset_bit31 1131 ; GFX7: bb.1 (%ir-block.0): 1132 ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 1133 ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 1134 ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 1135 ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 1136 ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 1137 ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 1138 ; GFX7: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 536870912, 0, 0 :: (dereferenceable invariant load 4) 1139 ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]] 1140 ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec 1141 ; GFX7: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] 1142 ; GFX7: SI_RETURN_TO_EPILOG implicit $sgpr0 1143 ; GFX8-LABEL: name: s_buffer_load_i32_offset_bit31 1144 ; GFX8: bb.1 (%ir-block.0): 1145 ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 1146 ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 1147 ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 1148 ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 1149 ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 1150 ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 1151 ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648 1152 ; GFX8: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load 4) 1153 ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] 1154 ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec 1155 ; GFX8: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] 1156 ; GFX8: SI_RETURN_TO_EPILOG implicit $sgpr0 1157 %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 -2147483648, i32 0) 1158 ret i32 %load 1159} 1160 1161define amdgpu_ps i32 @s_buffer_load_i32_offset_glc_bit30(<4 x i32> inreg %desc) { 1162 ; GFX6-LABEL: name: s_buffer_load_i32_offset_glc_bit30 1163 ; GFX6: bb.1 (%ir-block.0): 1164 ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 1165 ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 1166 ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 1167 ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 1168 ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 1169 ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 1170 ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1073741824 1171 ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 1, 0 :: (dereferenceable invariant load 4) 1172 ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] 1173 ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec 1174 ; GFX6: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] 1175 ; GFX6: SI_RETURN_TO_EPILOG implicit $sgpr0 1176 ; GFX7-LABEL: name: s_buffer_load_i32_offset_glc_bit30 1177 ; GFX7: bb.1 (%ir-block.0): 1178 ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 1179 ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 1180 ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 1181 ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 1182 ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 1183 ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 1184 ; GFX7: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 268435456, 1, 0 :: (dereferenceable invariant load 4) 1185 ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]] 1186 ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec 1187 ; GFX7: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] 1188 ; GFX7: SI_RETURN_TO_EPILOG implicit $sgpr0 1189 ; GFX8-LABEL: name: s_buffer_load_i32_offset_glc_bit30 1190 ; GFX8: bb.1 (%ir-block.0): 1191 ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 1192 ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 1193 ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 1194 ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 1195 ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 1196 ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 1197 ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1073741824 1198 ; GFX8: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 1, 0 :: (dereferenceable invariant load 4) 1199 ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] 1200 ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec 1201 ; GFX8: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] 1202 ; GFX8: SI_RETURN_TO_EPILOG implicit $sgpr0 1203 %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 1073741824, i32 1) 1204 ret i32 %load 1205} 1206 1207define amdgpu_ps i32 @s_buffer_load_i32_offset_bit29(<4 x i32> inreg %desc) { 1208 ; GFX6-LABEL: name: s_buffer_load_i32_offset_bit29 1209 ; GFX6: bb.1 (%ir-block.0): 1210 ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 1211 ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 1212 ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 1213 ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 1214 ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 1215 ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 1216 ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 536870912 1217 ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load 4) 1218 ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] 1219 ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec 1220 ; GFX6: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] 1221 ; GFX6: SI_RETURN_TO_EPILOG implicit $sgpr0 1222 ; GFX7-LABEL: name: s_buffer_load_i32_offset_bit29 1223 ; GFX7: bb.1 (%ir-block.0): 1224 ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 1225 ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 1226 ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 1227 ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 1228 ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 1229 ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 1230 ; GFX7: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 134217728, 0, 0 :: (dereferenceable invariant load 4) 1231 ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]] 1232 ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec 1233 ; GFX7: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] 1234 ; GFX7: SI_RETURN_TO_EPILOG implicit $sgpr0 1235 ; GFX8-LABEL: name: s_buffer_load_i32_offset_bit29 1236 ; GFX8: bb.1 (%ir-block.0): 1237 ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 1238 ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 1239 ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 1240 ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 1241 ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 1242 ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 1243 ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 536870912 1244 ; GFX8: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load 4) 1245 ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] 1246 ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec 1247 ; GFX8: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] 1248 ; GFX8: SI_RETURN_TO_EPILOG implicit $sgpr0 1249 %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 536870912, i32 0) 1250 ret i32 %load 1251} 1252 1253define amdgpu_ps i32 @s_buffer_load_i32_offset_bit21(<4 x i32> inreg %desc) { 1254 ; GFX6-LABEL: name: s_buffer_load_i32_offset_bit21 1255 ; GFX6: bb.1 (%ir-block.0): 1256 ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 1257 ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 1258 ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 1259 ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 1260 ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 1261 ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 1262 ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2097152 1263 ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load 4) 1264 ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] 1265 ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec 1266 ; GFX6: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] 1267 ; GFX6: SI_RETURN_TO_EPILOG implicit $sgpr0 1268 ; GFX7-LABEL: name: s_buffer_load_i32_offset_bit21 1269 ; GFX7: bb.1 (%ir-block.0): 1270 ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 1271 ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 1272 ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 1273 ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 1274 ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 1275 ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 1276 ; GFX7: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 524288, 0, 0 :: (dereferenceable invariant load 4) 1277 ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]] 1278 ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec 1279 ; GFX7: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] 1280 ; GFX7: SI_RETURN_TO_EPILOG implicit $sgpr0 1281 ; GFX8-LABEL: name: s_buffer_load_i32_offset_bit21 1282 ; GFX8: bb.1 (%ir-block.0): 1283 ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 1284 ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 1285 ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 1286 ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 1287 ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 1288 ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 1289 ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2097152 1290 ; GFX8: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load 4) 1291 ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] 1292 ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec 1293 ; GFX8: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] 1294 ; GFX8: SI_RETURN_TO_EPILOG implicit $sgpr0 1295 %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 2097152, i32 0) 1296 ret i32 %load 1297} 1298 1299define amdgpu_ps i32 @s_buffer_load_i32_offset_bit20(<4 x i32> inreg %desc) { 1300 ; GFX6-LABEL: name: s_buffer_load_i32_offset_bit20 1301 ; GFX6: bb.1 (%ir-block.0): 1302 ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 1303 ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 1304 ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 1305 ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 1306 ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 1307 ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 1308 ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1048576 1309 ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load 4) 1310 ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] 1311 ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec 1312 ; GFX6: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] 1313 ; GFX6: SI_RETURN_TO_EPILOG implicit $sgpr0 1314 ; GFX7-LABEL: name: s_buffer_load_i32_offset_bit20 1315 ; GFX7: bb.1 (%ir-block.0): 1316 ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 1317 ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 1318 ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 1319 ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 1320 ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 1321 ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 1322 ; GFX7: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 262144, 0, 0 :: (dereferenceable invariant load 4) 1323 ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]] 1324 ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec 1325 ; GFX7: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] 1326 ; GFX7: SI_RETURN_TO_EPILOG implicit $sgpr0 1327 ; GFX8-LABEL: name: s_buffer_load_i32_offset_bit20 1328 ; GFX8: bb.1 (%ir-block.0): 1329 ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 1330 ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 1331 ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 1332 ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 1333 ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 1334 ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 1335 ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1048576 1336 ; GFX8: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load 4) 1337 ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] 1338 ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec 1339 ; GFX8: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] 1340 ; GFX8: SI_RETURN_TO_EPILOG implicit $sgpr0 1341 %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 1048576, i32 0) 1342 ret i32 %load 1343} 1344 1345define amdgpu_ps i32 @s_buffer_load_i32_offset_neg_bit20(<4 x i32> inreg %desc) { 1346 ; GFX6-LABEL: name: s_buffer_load_i32_offset_neg_bit20 1347 ; GFX6: bb.1 (%ir-block.0): 1348 ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 1349 ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 1350 ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 1351 ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 1352 ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 1353 ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 1354 ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -1048576 1355 ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load 4) 1356 ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] 1357 ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec 1358 ; GFX6: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] 1359 ; GFX6: SI_RETURN_TO_EPILOG implicit $sgpr0 1360 ; GFX7-LABEL: name: s_buffer_load_i32_offset_neg_bit20 1361 ; GFX7: bb.1 (%ir-block.0): 1362 ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 1363 ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 1364 ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 1365 ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 1366 ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 1367 ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 1368 ; GFX7: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 1073479680, 0, 0 :: (dereferenceable invariant load 4) 1369 ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]] 1370 ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec 1371 ; GFX7: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] 1372 ; GFX7: SI_RETURN_TO_EPILOG implicit $sgpr0 1373 ; GFX8-LABEL: name: s_buffer_load_i32_offset_neg_bit20 1374 ; GFX8: bb.1 (%ir-block.0): 1375 ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 1376 ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 1377 ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 1378 ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 1379 ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 1380 ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 1381 ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -1048576 1382 ; GFX8: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load 4) 1383 ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] 1384 ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec 1385 ; GFX8: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] 1386 ; GFX8: SI_RETURN_TO_EPILOG implicit $sgpr0 1387 %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 -1048576, i32 0) 1388 ret i32 %load 1389} 1390 1391define amdgpu_ps i32 @s_buffer_load_i32_offset_bit19(<4 x i32> inreg %desc) { 1392 ; GFX6-LABEL: name: s_buffer_load_i32_offset_bit19 1393 ; GFX6: bb.1 (%ir-block.0): 1394 ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 1395 ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 1396 ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 1397 ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 1398 ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 1399 ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 1400 ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 524288 1401 ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load 4) 1402 ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] 1403 ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec 1404 ; GFX6: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] 1405 ; GFX6: SI_RETURN_TO_EPILOG implicit $sgpr0 1406 ; GFX7-LABEL: name: s_buffer_load_i32_offset_bit19 1407 ; GFX7: bb.1 (%ir-block.0): 1408 ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 1409 ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 1410 ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 1411 ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 1412 ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 1413 ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 1414 ; GFX7: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 131072, 0, 0 :: (dereferenceable invariant load 4) 1415 ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]] 1416 ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec 1417 ; GFX7: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] 1418 ; GFX7: SI_RETURN_TO_EPILOG implicit $sgpr0 1419 ; GFX8-LABEL: name: s_buffer_load_i32_offset_bit19 1420 ; GFX8: bb.1 (%ir-block.0): 1421 ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 1422 ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 1423 ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 1424 ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 1425 ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 1426 ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 1427 ; GFX8: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 524288, 0, 0 :: (dereferenceable invariant load 4) 1428 ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]] 1429 ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec 1430 ; GFX8: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] 1431 ; GFX8: SI_RETURN_TO_EPILOG implicit $sgpr0 1432 %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 524288, i32 0) 1433 ret i32 %load 1434} 1435 1436define amdgpu_ps i32 @s_buffer_load_i32_offset_neg_bit19(<4 x i32> inreg %desc) { 1437 ; GFX6-LABEL: name: s_buffer_load_i32_offset_neg_bit19 1438 ; GFX6: bb.1 (%ir-block.0): 1439 ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 1440 ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 1441 ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 1442 ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 1443 ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 1444 ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 1445 ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -524288 1446 ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load 4) 1447 ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] 1448 ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec 1449 ; GFX6: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] 1450 ; GFX6: SI_RETURN_TO_EPILOG implicit $sgpr0 1451 ; GFX7-LABEL: name: s_buffer_load_i32_offset_neg_bit19 1452 ; GFX7: bb.1 (%ir-block.0): 1453 ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 1454 ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 1455 ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 1456 ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 1457 ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 1458 ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 1459 ; GFX7: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 1073610752, 0, 0 :: (dereferenceable invariant load 4) 1460 ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]] 1461 ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec 1462 ; GFX7: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] 1463 ; GFX7: SI_RETURN_TO_EPILOG implicit $sgpr0 1464 ; GFX8-LABEL: name: s_buffer_load_i32_offset_neg_bit19 1465 ; GFX8: bb.1 (%ir-block.0): 1466 ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 1467 ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 1468 ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 1469 ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 1470 ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 1471 ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 1472 ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -524288 1473 ; GFX8: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load 4) 1474 ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] 1475 ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec 1476 ; GFX8: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] 1477 ; GFX8: SI_RETURN_TO_EPILOG implicit $sgpr0 1478 %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 -524288, i32 0) 1479 ret i32 %load 1480} 1481 1482; Check cases that need to be converted to MUBUF due to the offset being a VGPR. 1483define amdgpu_ps float @s_buffer_load_f32_vgpr_offset(<4 x i32> inreg %rsrc, i32 %soffset) { 1484 ; GFX6-LABEL: name: s_buffer_load_f32_vgpr_offset 1485 ; GFX6: bb.1 (%ir-block.0): 1486 ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 1487 ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 1488 ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 1489 ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 1490 ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 1491 ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 1492 ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 1493 ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 1494 ; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4) 1495 ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] 1496 ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0 1497 ; GFX7-LABEL: name: s_buffer_load_f32_vgpr_offset 1498 ; GFX7: bb.1 (%ir-block.0): 1499 ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 1500 ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 1501 ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 1502 ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 1503 ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 1504 ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 1505 ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 1506 ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 1507 ; GFX7: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4) 1508 ; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] 1509 ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0 1510 ; GFX8-LABEL: name: s_buffer_load_f32_vgpr_offset 1511 ; GFX8: bb.1 (%ir-block.0): 1512 ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 1513 ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 1514 ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 1515 ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 1516 ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 1517 ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 1518 ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 1519 ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 1520 ; GFX8: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4) 1521 ; GFX8: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] 1522 ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0 1523 %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %soffset, i32 0) 1524 ret float %val 1525} 1526 1527define amdgpu_ps <2 x float> @s_buffer_load_v2f32_vgpr_offset(<4 x i32> inreg %rsrc, i32 %soffset) { 1528 ; GFX6-LABEL: name: s_buffer_load_v2f32_vgpr_offset 1529 ; GFX6: bb.1 (%ir-block.0): 1530 ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 1531 ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 1532 ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 1533 ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 1534 ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 1535 ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 1536 ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 1537 ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 1538 ; GFX6: [[BUFFER_LOAD_DWORDX2_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 8, align 4) 1539 ; GFX6: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub0 1540 ; GFX6: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub1 1541 ; GFX6: $vgpr0 = COPY [[COPY5]] 1542 ; GFX6: $vgpr1 = COPY [[COPY6]] 1543 ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 1544 ; GFX7-LABEL: name: s_buffer_load_v2f32_vgpr_offset 1545 ; GFX7: bb.1 (%ir-block.0): 1546 ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 1547 ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 1548 ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 1549 ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 1550 ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 1551 ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 1552 ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 1553 ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 1554 ; GFX7: [[BUFFER_LOAD_DWORDX2_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 8, align 4) 1555 ; GFX7: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub0 1556 ; GFX7: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub1 1557 ; GFX7: $vgpr0 = COPY [[COPY5]] 1558 ; GFX7: $vgpr1 = COPY [[COPY6]] 1559 ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 1560 ; GFX8-LABEL: name: s_buffer_load_v2f32_vgpr_offset 1561 ; GFX8: bb.1 (%ir-block.0): 1562 ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 1563 ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 1564 ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 1565 ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 1566 ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 1567 ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 1568 ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 1569 ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 1570 ; GFX8: [[BUFFER_LOAD_DWORDX2_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 8, align 4) 1571 ; GFX8: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub0 1572 ; GFX8: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub1 1573 ; GFX8: $vgpr0 = COPY [[COPY5]] 1574 ; GFX8: $vgpr1 = COPY [[COPY6]] 1575 ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 1576 %val = call <2 x float> @llvm.amdgcn.s.buffer.load.v2f32(<4 x i32> %rsrc, i32 %soffset, i32 0) 1577 ret <2 x float> %val 1578} 1579 1580define amdgpu_ps <3 x float> @s_buffer_load_v3f32_vgpr_offset(<4 x i32> inreg %rsrc, i32 %soffset) { 1581 ; GFX6-LABEL: name: s_buffer_load_v3f32_vgpr_offset 1582 ; GFX6: bb.1 (%ir-block.0): 1583 ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 1584 ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 1585 ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 1586 ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 1587 ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 1588 ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 1589 ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 1590 ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 1591 ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) 1592 ; GFX6: [[DEF:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF 1593 ; GFX6: [[COPY5:%[0-9]+]]:vreg_128 = COPY [[DEF]] 1594 ; GFX6: [[COPY6:%[0-9]+]]:vreg_128 = COPY [[DEF]] 1595 ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[COPY5]], %subreg.sub4_sub5_sub6_sub7, [[COPY6]], %subreg.sub8_sub9_sub10_sub11 1596 ; GFX6: [[COPY7:%[0-9]+]]:vreg_96 = COPY [[REG_SEQUENCE1]].sub0_sub1_sub2 1597 ; GFX6: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY7]].sub0 1598 ; GFX6: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY7]].sub1 1599 ; GFX6: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY7]].sub2 1600 ; GFX6: $vgpr0 = COPY [[COPY8]] 1601 ; GFX6: $vgpr1 = COPY [[COPY9]] 1602 ; GFX6: $vgpr2 = COPY [[COPY10]] 1603 ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 1604 ; GFX7-LABEL: name: s_buffer_load_v3f32_vgpr_offset 1605 ; GFX7: bb.1 (%ir-block.0): 1606 ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 1607 ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 1608 ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 1609 ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 1610 ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 1611 ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 1612 ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 1613 ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 1614 ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) 1615 ; GFX7: [[DEF:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF 1616 ; GFX7: [[COPY5:%[0-9]+]]:vreg_128 = COPY [[DEF]] 1617 ; GFX7: [[COPY6:%[0-9]+]]:vreg_128 = COPY [[DEF]] 1618 ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[COPY5]], %subreg.sub4_sub5_sub6_sub7, [[COPY6]], %subreg.sub8_sub9_sub10_sub11 1619 ; GFX7: [[COPY7:%[0-9]+]]:vreg_96 = COPY [[REG_SEQUENCE1]].sub0_sub1_sub2 1620 ; GFX7: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY7]].sub0 1621 ; GFX7: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY7]].sub1 1622 ; GFX7: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY7]].sub2 1623 ; GFX7: $vgpr0 = COPY [[COPY8]] 1624 ; GFX7: $vgpr1 = COPY [[COPY9]] 1625 ; GFX7: $vgpr2 = COPY [[COPY10]] 1626 ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 1627 ; GFX8-LABEL: name: s_buffer_load_v3f32_vgpr_offset 1628 ; GFX8: bb.1 (%ir-block.0): 1629 ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 1630 ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 1631 ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 1632 ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 1633 ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 1634 ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 1635 ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 1636 ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 1637 ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) 1638 ; GFX8: [[DEF:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF 1639 ; GFX8: [[COPY5:%[0-9]+]]:vreg_128 = COPY [[DEF]] 1640 ; GFX8: [[COPY6:%[0-9]+]]:vreg_128 = COPY [[DEF]] 1641 ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[COPY5]], %subreg.sub4_sub5_sub6_sub7, [[COPY6]], %subreg.sub8_sub9_sub10_sub11 1642 ; GFX8: [[COPY7:%[0-9]+]]:vreg_96 = COPY [[REG_SEQUENCE1]].sub0_sub1_sub2 1643 ; GFX8: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY7]].sub0 1644 ; GFX8: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY7]].sub1 1645 ; GFX8: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY7]].sub2 1646 ; GFX8: $vgpr0 = COPY [[COPY8]] 1647 ; GFX8: $vgpr1 = COPY [[COPY9]] 1648 ; GFX8: $vgpr2 = COPY [[COPY10]] 1649 ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 1650 %val = call <3 x float> @llvm.amdgcn.s.buffer.load.v3f32(<4 x i32> %rsrc, i32 %soffset, i32 0) 1651 ret <3 x float> %val 1652} 1653 1654define amdgpu_ps <4 x float> @s_buffer_load_v4f32_vgpr_offset(<4 x i32> inreg %rsrc, i32 %soffset) { 1655 ; GFX6-LABEL: name: s_buffer_load_v4f32_vgpr_offset 1656 ; GFX6: bb.1 (%ir-block.0): 1657 ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 1658 ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 1659 ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 1660 ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 1661 ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 1662 ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 1663 ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 1664 ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 1665 ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) 1666 ; GFX6: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub0 1667 ; GFX6: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub1 1668 ; GFX6: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub2 1669 ; GFX6: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub3 1670 ; GFX6: $vgpr0 = COPY [[COPY5]] 1671 ; GFX6: $vgpr1 = COPY [[COPY6]] 1672 ; GFX6: $vgpr2 = COPY [[COPY7]] 1673 ; GFX6: $vgpr3 = COPY [[COPY8]] 1674 ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 1675 ; GFX7-LABEL: name: s_buffer_load_v4f32_vgpr_offset 1676 ; GFX7: bb.1 (%ir-block.0): 1677 ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 1678 ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 1679 ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 1680 ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 1681 ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 1682 ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 1683 ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 1684 ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 1685 ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) 1686 ; GFX7: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub0 1687 ; GFX7: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub1 1688 ; GFX7: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub2 1689 ; GFX7: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub3 1690 ; GFX7: $vgpr0 = COPY [[COPY5]] 1691 ; GFX7: $vgpr1 = COPY [[COPY6]] 1692 ; GFX7: $vgpr2 = COPY [[COPY7]] 1693 ; GFX7: $vgpr3 = COPY [[COPY8]] 1694 ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 1695 ; GFX8-LABEL: name: s_buffer_load_v4f32_vgpr_offset 1696 ; GFX8: bb.1 (%ir-block.0): 1697 ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 1698 ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 1699 ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 1700 ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 1701 ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 1702 ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 1703 ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 1704 ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 1705 ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) 1706 ; GFX8: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub0 1707 ; GFX8: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub1 1708 ; GFX8: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub2 1709 ; GFX8: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub3 1710 ; GFX8: $vgpr0 = COPY [[COPY5]] 1711 ; GFX8: $vgpr1 = COPY [[COPY6]] 1712 ; GFX8: $vgpr2 = COPY [[COPY7]] 1713 ; GFX8: $vgpr3 = COPY [[COPY8]] 1714 ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 1715 %val = call <4 x float> @llvm.amdgcn.s.buffer.load.v4f32(<4 x i32> %rsrc, i32 %soffset, i32 0) 1716 ret <4 x float> %val 1717} 1718 1719define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset(<4 x i32> inreg %rsrc, i32 %soffset) { 1720 ; GFX6-LABEL: name: s_buffer_load_v8f32_vgpr_offset 1721 ; GFX6: bb.1 (%ir-block.0): 1722 ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 1723 ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 1724 ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 1725 ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 1726 ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 1727 ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 1728 ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 1729 ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 1730 ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) 1731 ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) 1732 ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 1733 ; GFX6: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 1734 ; GFX6: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 1735 ; GFX6: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2 1736 ; GFX6: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3 1737 ; GFX6: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4 1738 ; GFX6: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5 1739 ; GFX6: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6 1740 ; GFX6: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7 1741 ; GFX6: $vgpr0 = COPY [[COPY5]] 1742 ; GFX6: $vgpr1 = COPY [[COPY6]] 1743 ; GFX6: $vgpr2 = COPY [[COPY7]] 1744 ; GFX6: $vgpr3 = COPY [[COPY8]] 1745 ; GFX6: $vgpr4 = COPY [[COPY9]] 1746 ; GFX6: $vgpr5 = COPY [[COPY10]] 1747 ; GFX6: $vgpr6 = COPY [[COPY11]] 1748 ; GFX6: $vgpr7 = COPY [[COPY12]] 1749 ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 1750 ; GFX7-LABEL: name: s_buffer_load_v8f32_vgpr_offset 1751 ; GFX7: bb.1 (%ir-block.0): 1752 ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 1753 ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 1754 ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 1755 ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 1756 ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 1757 ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 1758 ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 1759 ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 1760 ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) 1761 ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) 1762 ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 1763 ; GFX7: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 1764 ; GFX7: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 1765 ; GFX7: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2 1766 ; GFX7: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3 1767 ; GFX7: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4 1768 ; GFX7: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5 1769 ; GFX7: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6 1770 ; GFX7: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7 1771 ; GFX7: $vgpr0 = COPY [[COPY5]] 1772 ; GFX7: $vgpr1 = COPY [[COPY6]] 1773 ; GFX7: $vgpr2 = COPY [[COPY7]] 1774 ; GFX7: $vgpr3 = COPY [[COPY8]] 1775 ; GFX7: $vgpr4 = COPY [[COPY9]] 1776 ; GFX7: $vgpr5 = COPY [[COPY10]] 1777 ; GFX7: $vgpr6 = COPY [[COPY11]] 1778 ; GFX7: $vgpr7 = COPY [[COPY12]] 1779 ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 1780 ; GFX8-LABEL: name: s_buffer_load_v8f32_vgpr_offset 1781 ; GFX8: bb.1 (%ir-block.0): 1782 ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 1783 ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 1784 ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 1785 ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 1786 ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 1787 ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 1788 ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 1789 ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 1790 ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) 1791 ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) 1792 ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 1793 ; GFX8: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 1794 ; GFX8: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 1795 ; GFX8: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2 1796 ; GFX8: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3 1797 ; GFX8: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4 1798 ; GFX8: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5 1799 ; GFX8: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6 1800 ; GFX8: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7 1801 ; GFX8: $vgpr0 = COPY [[COPY5]] 1802 ; GFX8: $vgpr1 = COPY [[COPY6]] 1803 ; GFX8: $vgpr2 = COPY [[COPY7]] 1804 ; GFX8: $vgpr3 = COPY [[COPY8]] 1805 ; GFX8: $vgpr4 = COPY [[COPY9]] 1806 ; GFX8: $vgpr5 = COPY [[COPY10]] 1807 ; GFX8: $vgpr6 = COPY [[COPY11]] 1808 ; GFX8: $vgpr7 = COPY [[COPY12]] 1809 ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 1810 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0) 1811 ret <8 x float> %val 1812} 1813 1814define amdgpu_ps <16 x float> @s_buffer_load_v16f32_vgpr_offset(<4 x i32> inreg %rsrc, i32 %soffset) { 1815 ; GFX6-LABEL: name: s_buffer_load_v16f32_vgpr_offset 1816 ; GFX6: bb.1 (%ir-block.0): 1817 ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 1818 ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 1819 ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 1820 ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 1821 ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 1822 ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 1823 ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 1824 ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 1825 ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) 1826 ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) 1827 ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 32, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16 + 16, align 4) 1828 ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 48, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16 + 48, align 4) 1829 ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7, [[BUFFER_LOAD_DWORDX4_OFFEN2]], %subreg.sub8_sub9_sub10_sub11, [[BUFFER_LOAD_DWORDX4_OFFEN3]], %subreg.sub12_sub13_sub14_sub15 1830 ; GFX6: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 1831 ; GFX6: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 1832 ; GFX6: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2 1833 ; GFX6: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3 1834 ; GFX6: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4 1835 ; GFX6: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5 1836 ; GFX6: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6 1837 ; GFX6: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7 1838 ; GFX6: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub8 1839 ; GFX6: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub9 1840 ; GFX6: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub10 1841 ; GFX6: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub11 1842 ; GFX6: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub12 1843 ; GFX6: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub13 1844 ; GFX6: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub14 1845 ; GFX6: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub15 1846 ; GFX6: $vgpr0 = COPY [[COPY5]] 1847 ; GFX6: $vgpr1 = COPY [[COPY6]] 1848 ; GFX6: $vgpr2 = COPY [[COPY7]] 1849 ; GFX6: $vgpr3 = COPY [[COPY8]] 1850 ; GFX6: $vgpr4 = COPY [[COPY9]] 1851 ; GFX6: $vgpr5 = COPY [[COPY10]] 1852 ; GFX6: $vgpr6 = COPY [[COPY11]] 1853 ; GFX6: $vgpr7 = COPY [[COPY12]] 1854 ; GFX6: $vgpr8 = COPY [[COPY13]] 1855 ; GFX6: $vgpr9 = COPY [[COPY14]] 1856 ; GFX6: $vgpr10 = COPY [[COPY15]] 1857 ; GFX6: $vgpr11 = COPY [[COPY16]] 1858 ; GFX6: $vgpr12 = COPY [[COPY17]] 1859 ; GFX6: $vgpr13 = COPY [[COPY18]] 1860 ; GFX6: $vgpr14 = COPY [[COPY19]] 1861 ; GFX6: $vgpr15 = COPY [[COPY20]] 1862 ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 1863 ; GFX7-LABEL: name: s_buffer_load_v16f32_vgpr_offset 1864 ; GFX7: bb.1 (%ir-block.0): 1865 ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 1866 ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 1867 ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 1868 ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 1869 ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 1870 ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 1871 ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 1872 ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 1873 ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) 1874 ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) 1875 ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 32, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16 + 16, align 4) 1876 ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 48, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16 + 48, align 4) 1877 ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7, [[BUFFER_LOAD_DWORDX4_OFFEN2]], %subreg.sub8_sub9_sub10_sub11, [[BUFFER_LOAD_DWORDX4_OFFEN3]], %subreg.sub12_sub13_sub14_sub15 1878 ; GFX7: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 1879 ; GFX7: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 1880 ; GFX7: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2 1881 ; GFX7: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3 1882 ; GFX7: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4 1883 ; GFX7: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5 1884 ; GFX7: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6 1885 ; GFX7: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7 1886 ; GFX7: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub8 1887 ; GFX7: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub9 1888 ; GFX7: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub10 1889 ; GFX7: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub11 1890 ; GFX7: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub12 1891 ; GFX7: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub13 1892 ; GFX7: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub14 1893 ; GFX7: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub15 1894 ; GFX7: $vgpr0 = COPY [[COPY5]] 1895 ; GFX7: $vgpr1 = COPY [[COPY6]] 1896 ; GFX7: $vgpr2 = COPY [[COPY7]] 1897 ; GFX7: $vgpr3 = COPY [[COPY8]] 1898 ; GFX7: $vgpr4 = COPY [[COPY9]] 1899 ; GFX7: $vgpr5 = COPY [[COPY10]] 1900 ; GFX7: $vgpr6 = COPY [[COPY11]] 1901 ; GFX7: $vgpr7 = COPY [[COPY12]] 1902 ; GFX7: $vgpr8 = COPY [[COPY13]] 1903 ; GFX7: $vgpr9 = COPY [[COPY14]] 1904 ; GFX7: $vgpr10 = COPY [[COPY15]] 1905 ; GFX7: $vgpr11 = COPY [[COPY16]] 1906 ; GFX7: $vgpr12 = COPY [[COPY17]] 1907 ; GFX7: $vgpr13 = COPY [[COPY18]] 1908 ; GFX7: $vgpr14 = COPY [[COPY19]] 1909 ; GFX7: $vgpr15 = COPY [[COPY20]] 1910 ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 1911 ; GFX8-LABEL: name: s_buffer_load_v16f32_vgpr_offset 1912 ; GFX8: bb.1 (%ir-block.0): 1913 ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 1914 ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 1915 ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 1916 ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 1917 ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 1918 ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 1919 ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 1920 ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 1921 ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) 1922 ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) 1923 ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 32, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16 + 16, align 4) 1924 ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 48, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16 + 48, align 4) 1925 ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7, [[BUFFER_LOAD_DWORDX4_OFFEN2]], %subreg.sub8_sub9_sub10_sub11, [[BUFFER_LOAD_DWORDX4_OFFEN3]], %subreg.sub12_sub13_sub14_sub15 1926 ; GFX8: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 1927 ; GFX8: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 1928 ; GFX8: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2 1929 ; GFX8: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3 1930 ; GFX8: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4 1931 ; GFX8: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5 1932 ; GFX8: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6 1933 ; GFX8: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7 1934 ; GFX8: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub8 1935 ; GFX8: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub9 1936 ; GFX8: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub10 1937 ; GFX8: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub11 1938 ; GFX8: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub12 1939 ; GFX8: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub13 1940 ; GFX8: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub14 1941 ; GFX8: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub15 1942 ; GFX8: $vgpr0 = COPY [[COPY5]] 1943 ; GFX8: $vgpr1 = COPY [[COPY6]] 1944 ; GFX8: $vgpr2 = COPY [[COPY7]] 1945 ; GFX8: $vgpr3 = COPY [[COPY8]] 1946 ; GFX8: $vgpr4 = COPY [[COPY9]] 1947 ; GFX8: $vgpr5 = COPY [[COPY10]] 1948 ; GFX8: $vgpr6 = COPY [[COPY11]] 1949 ; GFX8: $vgpr7 = COPY [[COPY12]] 1950 ; GFX8: $vgpr8 = COPY [[COPY13]] 1951 ; GFX8: $vgpr9 = COPY [[COPY14]] 1952 ; GFX8: $vgpr10 = COPY [[COPY15]] 1953 ; GFX8: $vgpr11 = COPY [[COPY16]] 1954 ; GFX8: $vgpr12 = COPY [[COPY17]] 1955 ; GFX8: $vgpr13 = COPY [[COPY18]] 1956 ; GFX8: $vgpr14 = COPY [[COPY19]] 1957 ; GFX8: $vgpr15 = COPY [[COPY20]] 1958 ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 1959 %val = call <16 x float> @llvm.amdgcn.s.buffer.load.v16f32(<4 x i32> %rsrc, i32 %soffset, i32 0) 1960 ret <16 x float> %val 1961} 1962 1963define amdgpu_ps float @s_buffer_load_f32_vgpr_offset_add_4092(<4 x i32> inreg %rsrc, i32 %soffset.base) { 1964 ; GFX6-LABEL: name: s_buffer_load_f32_vgpr_offset_add_4092 1965 ; GFX6: bb.1 (%ir-block.0): 1966 ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 1967 ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 1968 ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 1969 ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 1970 ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 1971 ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 1972 ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 1973 ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 1974 ; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4092, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4) 1975 ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] 1976 ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0 1977 ; GFX7-LABEL: name: s_buffer_load_f32_vgpr_offset_add_4092 1978 ; GFX7: bb.1 (%ir-block.0): 1979 ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 1980 ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 1981 ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 1982 ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 1983 ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 1984 ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 1985 ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 1986 ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 1987 ; GFX7: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4092, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4) 1988 ; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] 1989 ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0 1990 ; GFX8-LABEL: name: s_buffer_load_f32_vgpr_offset_add_4092 1991 ; GFX8: bb.1 (%ir-block.0): 1992 ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 1993 ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 1994 ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 1995 ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 1996 ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 1997 ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 1998 ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 1999 ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 2000 ; GFX8: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4092, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4) 2001 ; GFX8: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] 2002 ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0 2003 %soffset = add i32 %soffset.base, 4092 2004 %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %soffset, i32 0) 2005 ret float %val 2006} 2007 2008define amdgpu_ps float @s_buffer_load_f32_vgpr_offset_add_4095(<4 x i32> inreg %rsrc, i32 %soffset.base) { 2009 ; GFX6-LABEL: name: s_buffer_load_f32_vgpr_offset_add_4095 2010 ; GFX6: bb.1 (%ir-block.0): 2011 ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 2012 ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 2013 ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 2014 ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 2015 ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 2016 ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 2017 ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 2018 ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 2019 ; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4095, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4) 2020 ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] 2021 ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0 2022 ; GFX7-LABEL: name: s_buffer_load_f32_vgpr_offset_add_4095 2023 ; GFX7: bb.1 (%ir-block.0): 2024 ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 2025 ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 2026 ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 2027 ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 2028 ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 2029 ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 2030 ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 2031 ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 2032 ; GFX7: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4095, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4) 2033 ; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] 2034 ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0 2035 ; GFX8-LABEL: name: s_buffer_load_f32_vgpr_offset_add_4095 2036 ; GFX8: bb.1 (%ir-block.0): 2037 ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 2038 ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 2039 ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 2040 ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 2041 ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 2042 ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 2043 ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 2044 ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 2045 ; GFX8: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4095, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4) 2046 ; GFX8: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] 2047 ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0 2048 %soffset = add i32 %soffset.base, 4095 2049 %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %soffset, i32 0) 2050 ret float %val 2051} 2052 2053define amdgpu_ps float @s_buffer_load_f32_vgpr_offset_add_4096(<4 x i32> inreg %rsrc, i32 %soffset.base) { 2054 ; GFX6-LABEL: name: s_buffer_load_f32_vgpr_offset_add_4096 2055 ; GFX6: bb.1 (%ir-block.0): 2056 ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 2057 ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 2058 ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 2059 ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 2060 ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 2061 ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 2062 ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 2063 ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 2064 ; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4) 2065 ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] 2066 ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0 2067 ; GFX7-LABEL: name: s_buffer_load_f32_vgpr_offset_add_4096 2068 ; GFX7: bb.1 (%ir-block.0): 2069 ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 2070 ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 2071 ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 2072 ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 2073 ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 2074 ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 2075 ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 2076 ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 2077 ; GFX7: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4) 2078 ; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] 2079 ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0 2080 ; GFX8-LABEL: name: s_buffer_load_f32_vgpr_offset_add_4096 2081 ; GFX8: bb.1 (%ir-block.0): 2082 ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 2083 ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 2084 ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 2085 ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 2086 ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 2087 ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 2088 ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 2089 ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1 2090 ; GFX8: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4095, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4) 2091 ; GFX8: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] 2092 ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0 2093 %soffset = add i32 %soffset.base, 4096 2094 %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %soffset, i32 0) 2095 ret float %val 2096} 2097 2098; Make sure the base offset is added to each split load. 2099define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_add_4064(<4 x i32> inreg %rsrc, i32 %soffset.base) { 2100 ; GFX6-LABEL: name: s_buffer_load_v8f32_vgpr_offset_add_4064 2101 ; GFX6: bb.1 (%ir-block.0): 2102 ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 2103 ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 2104 ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 2105 ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 2106 ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 2107 ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 2108 ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 2109 ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 2110 ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) 2111 ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) 2112 ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 2113 ; GFX6: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 2114 ; GFX6: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 2115 ; GFX6: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2 2116 ; GFX6: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3 2117 ; GFX6: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4 2118 ; GFX6: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5 2119 ; GFX6: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6 2120 ; GFX6: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7 2121 ; GFX6: $vgpr0 = COPY [[COPY5]] 2122 ; GFX6: $vgpr1 = COPY [[COPY6]] 2123 ; GFX6: $vgpr2 = COPY [[COPY7]] 2124 ; GFX6: $vgpr3 = COPY [[COPY8]] 2125 ; GFX6: $vgpr4 = COPY [[COPY9]] 2126 ; GFX6: $vgpr5 = COPY [[COPY10]] 2127 ; GFX6: $vgpr6 = COPY [[COPY11]] 2128 ; GFX6: $vgpr7 = COPY [[COPY12]] 2129 ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 2130 ; GFX7-LABEL: name: s_buffer_load_v8f32_vgpr_offset_add_4064 2131 ; GFX7: bb.1 (%ir-block.0): 2132 ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 2133 ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 2134 ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 2135 ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 2136 ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 2137 ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 2138 ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 2139 ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 2140 ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) 2141 ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) 2142 ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 2143 ; GFX7: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 2144 ; GFX7: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 2145 ; GFX7: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2 2146 ; GFX7: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3 2147 ; GFX7: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4 2148 ; GFX7: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5 2149 ; GFX7: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6 2150 ; GFX7: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7 2151 ; GFX7: $vgpr0 = COPY [[COPY5]] 2152 ; GFX7: $vgpr1 = COPY [[COPY6]] 2153 ; GFX7: $vgpr2 = COPY [[COPY7]] 2154 ; GFX7: $vgpr3 = COPY [[COPY8]] 2155 ; GFX7: $vgpr4 = COPY [[COPY9]] 2156 ; GFX7: $vgpr5 = COPY [[COPY10]] 2157 ; GFX7: $vgpr6 = COPY [[COPY11]] 2158 ; GFX7: $vgpr7 = COPY [[COPY12]] 2159 ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 2160 ; GFX8-LABEL: name: s_buffer_load_v8f32_vgpr_offset_add_4064 2161 ; GFX8: bb.1 (%ir-block.0): 2162 ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 2163 ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 2164 ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 2165 ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 2166 ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 2167 ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 2168 ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 2169 ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 2170 ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) 2171 ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) 2172 ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 2173 ; GFX8: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 2174 ; GFX8: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 2175 ; GFX8: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2 2176 ; GFX8: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3 2177 ; GFX8: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4 2178 ; GFX8: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5 2179 ; GFX8: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6 2180 ; GFX8: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7 2181 ; GFX8: $vgpr0 = COPY [[COPY5]] 2182 ; GFX8: $vgpr1 = COPY [[COPY6]] 2183 ; GFX8: $vgpr2 = COPY [[COPY7]] 2184 ; GFX8: $vgpr3 = COPY [[COPY8]] 2185 ; GFX8: $vgpr4 = COPY [[COPY9]] 2186 ; GFX8: $vgpr5 = COPY [[COPY10]] 2187 ; GFX8: $vgpr6 = COPY [[COPY11]] 2188 ; GFX8: $vgpr7 = COPY [[COPY12]] 2189 ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 2190 %soffset = add i32 %soffset.base, 4064 2191 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0) 2192 ret <8 x float> %val 2193} 2194 2195; Make sure the maximum offset isn't exeeded when splitting this 2196define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_add_4068(<4 x i32> inreg %rsrc, i32 %soffset.base) { 2197 ; GFX6-LABEL: name: s_buffer_load_v8f32_vgpr_offset_add_4068 2198 ; GFX6: bb.1 (%ir-block.0): 2199 ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 2200 ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 2201 ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 2202 ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 2203 ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 2204 ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 2205 ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 2206 ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4068 2207 ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) 2208 ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) 2209 ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 2210 ; GFX6: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 2211 ; GFX6: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 2212 ; GFX6: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2 2213 ; GFX6: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3 2214 ; GFX6: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4 2215 ; GFX6: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5 2216 ; GFX6: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6 2217 ; GFX6: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7 2218 ; GFX6: $vgpr0 = COPY [[COPY5]] 2219 ; GFX6: $vgpr1 = COPY [[COPY6]] 2220 ; GFX6: $vgpr2 = COPY [[COPY7]] 2221 ; GFX6: $vgpr3 = COPY [[COPY8]] 2222 ; GFX6: $vgpr4 = COPY [[COPY9]] 2223 ; GFX6: $vgpr5 = COPY [[COPY10]] 2224 ; GFX6: $vgpr6 = COPY [[COPY11]] 2225 ; GFX6: $vgpr7 = COPY [[COPY12]] 2226 ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 2227 ; GFX7-LABEL: name: s_buffer_load_v8f32_vgpr_offset_add_4068 2228 ; GFX7: bb.1 (%ir-block.0): 2229 ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 2230 ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 2231 ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 2232 ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 2233 ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 2234 ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 2235 ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 2236 ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4068 2237 ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) 2238 ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) 2239 ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 2240 ; GFX7: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 2241 ; GFX7: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 2242 ; GFX7: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2 2243 ; GFX7: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3 2244 ; GFX7: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4 2245 ; GFX7: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5 2246 ; GFX7: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6 2247 ; GFX7: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7 2248 ; GFX7: $vgpr0 = COPY [[COPY5]] 2249 ; GFX7: $vgpr1 = COPY [[COPY6]] 2250 ; GFX7: $vgpr2 = COPY [[COPY7]] 2251 ; GFX7: $vgpr3 = COPY [[COPY8]] 2252 ; GFX7: $vgpr4 = COPY [[COPY9]] 2253 ; GFX7: $vgpr5 = COPY [[COPY10]] 2254 ; GFX7: $vgpr6 = COPY [[COPY11]] 2255 ; GFX7: $vgpr7 = COPY [[COPY12]] 2256 ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 2257 ; GFX8-LABEL: name: s_buffer_load_v8f32_vgpr_offset_add_4068 2258 ; GFX8: bb.1 (%ir-block.0): 2259 ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 2260 ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 2261 ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 2262 ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 2263 ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 2264 ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 2265 ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 2266 ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4 2267 ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) 2268 ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) 2269 ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 2270 ; GFX8: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 2271 ; GFX8: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 2272 ; GFX8: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2 2273 ; GFX8: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3 2274 ; GFX8: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4 2275 ; GFX8: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5 2276 ; GFX8: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6 2277 ; GFX8: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7 2278 ; GFX8: $vgpr0 = COPY [[COPY5]] 2279 ; GFX8: $vgpr1 = COPY [[COPY6]] 2280 ; GFX8: $vgpr2 = COPY [[COPY7]] 2281 ; GFX8: $vgpr3 = COPY [[COPY8]] 2282 ; GFX8: $vgpr4 = COPY [[COPY9]] 2283 ; GFX8: $vgpr5 = COPY [[COPY10]] 2284 ; GFX8: $vgpr6 = COPY [[COPY11]] 2285 ; GFX8: $vgpr7 = COPY [[COPY12]] 2286 ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 2287 %soffset = add i32 %soffset.base, 4068 2288 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0) 2289 ret <8 x float> %val 2290} 2291 2292define amdgpu_ps <16 x float> @s_buffer_load_v16f32_vgpr_offset_add_4032(<4 x i32> inreg %rsrc, i32 %soffset.base) { 2293 ; GFX6-LABEL: name: s_buffer_load_v16f32_vgpr_offset_add_4032 2294 ; GFX6: bb.1 (%ir-block.0): 2295 ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 2296 ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 2297 ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 2298 ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 2299 ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 2300 ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 2301 ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 2302 ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 2303 ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4032, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) 2304 ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4048, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) 2305 ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16 + 16, align 4) 2306 ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16 + 48, align 4) 2307 ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7, [[BUFFER_LOAD_DWORDX4_OFFEN2]], %subreg.sub8_sub9_sub10_sub11, [[BUFFER_LOAD_DWORDX4_OFFEN3]], %subreg.sub12_sub13_sub14_sub15 2308 ; GFX6: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 2309 ; GFX6: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 2310 ; GFX6: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2 2311 ; GFX6: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3 2312 ; GFX6: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4 2313 ; GFX6: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5 2314 ; GFX6: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6 2315 ; GFX6: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7 2316 ; GFX6: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub8 2317 ; GFX6: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub9 2318 ; GFX6: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub10 2319 ; GFX6: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub11 2320 ; GFX6: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub12 2321 ; GFX6: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub13 2322 ; GFX6: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub14 2323 ; GFX6: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub15 2324 ; GFX6: $vgpr0 = COPY [[COPY5]] 2325 ; GFX6: $vgpr1 = COPY [[COPY6]] 2326 ; GFX6: $vgpr2 = COPY [[COPY7]] 2327 ; GFX6: $vgpr3 = COPY [[COPY8]] 2328 ; GFX6: $vgpr4 = COPY [[COPY9]] 2329 ; GFX6: $vgpr5 = COPY [[COPY10]] 2330 ; GFX6: $vgpr6 = COPY [[COPY11]] 2331 ; GFX6: $vgpr7 = COPY [[COPY12]] 2332 ; GFX6: $vgpr8 = COPY [[COPY13]] 2333 ; GFX6: $vgpr9 = COPY [[COPY14]] 2334 ; GFX6: $vgpr10 = COPY [[COPY15]] 2335 ; GFX6: $vgpr11 = COPY [[COPY16]] 2336 ; GFX6: $vgpr12 = COPY [[COPY17]] 2337 ; GFX6: $vgpr13 = COPY [[COPY18]] 2338 ; GFX6: $vgpr14 = COPY [[COPY19]] 2339 ; GFX6: $vgpr15 = COPY [[COPY20]] 2340 ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 2341 ; GFX7-LABEL: name: s_buffer_load_v16f32_vgpr_offset_add_4032 2342 ; GFX7: bb.1 (%ir-block.0): 2343 ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 2344 ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 2345 ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 2346 ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 2347 ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 2348 ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 2349 ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 2350 ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 2351 ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4032, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) 2352 ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4048, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) 2353 ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16 + 16, align 4) 2354 ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16 + 48, align 4) 2355 ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7, [[BUFFER_LOAD_DWORDX4_OFFEN2]], %subreg.sub8_sub9_sub10_sub11, [[BUFFER_LOAD_DWORDX4_OFFEN3]], %subreg.sub12_sub13_sub14_sub15 2356 ; GFX7: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 2357 ; GFX7: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 2358 ; GFX7: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2 2359 ; GFX7: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3 2360 ; GFX7: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4 2361 ; GFX7: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5 2362 ; GFX7: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6 2363 ; GFX7: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7 2364 ; GFX7: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub8 2365 ; GFX7: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub9 2366 ; GFX7: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub10 2367 ; GFX7: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub11 2368 ; GFX7: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub12 2369 ; GFX7: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub13 2370 ; GFX7: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub14 2371 ; GFX7: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub15 2372 ; GFX7: $vgpr0 = COPY [[COPY5]] 2373 ; GFX7: $vgpr1 = COPY [[COPY6]] 2374 ; GFX7: $vgpr2 = COPY [[COPY7]] 2375 ; GFX7: $vgpr3 = COPY [[COPY8]] 2376 ; GFX7: $vgpr4 = COPY [[COPY9]] 2377 ; GFX7: $vgpr5 = COPY [[COPY10]] 2378 ; GFX7: $vgpr6 = COPY [[COPY11]] 2379 ; GFX7: $vgpr7 = COPY [[COPY12]] 2380 ; GFX7: $vgpr8 = COPY [[COPY13]] 2381 ; GFX7: $vgpr9 = COPY [[COPY14]] 2382 ; GFX7: $vgpr10 = COPY [[COPY15]] 2383 ; GFX7: $vgpr11 = COPY [[COPY16]] 2384 ; GFX7: $vgpr12 = COPY [[COPY17]] 2385 ; GFX7: $vgpr13 = COPY [[COPY18]] 2386 ; GFX7: $vgpr14 = COPY [[COPY19]] 2387 ; GFX7: $vgpr15 = COPY [[COPY20]] 2388 ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 2389 ; GFX8-LABEL: name: s_buffer_load_v16f32_vgpr_offset_add_4032 2390 ; GFX8: bb.1 (%ir-block.0): 2391 ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 2392 ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 2393 ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 2394 ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 2395 ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 2396 ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 2397 ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 2398 ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 2399 ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4032, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) 2400 ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4048, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) 2401 ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16 + 16, align 4) 2402 ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16 + 48, align 4) 2403 ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7, [[BUFFER_LOAD_DWORDX4_OFFEN2]], %subreg.sub8_sub9_sub10_sub11, [[BUFFER_LOAD_DWORDX4_OFFEN3]], %subreg.sub12_sub13_sub14_sub15 2404 ; GFX8: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 2405 ; GFX8: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 2406 ; GFX8: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2 2407 ; GFX8: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3 2408 ; GFX8: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4 2409 ; GFX8: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5 2410 ; GFX8: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6 2411 ; GFX8: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7 2412 ; GFX8: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub8 2413 ; GFX8: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub9 2414 ; GFX8: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub10 2415 ; GFX8: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub11 2416 ; GFX8: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub12 2417 ; GFX8: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub13 2418 ; GFX8: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub14 2419 ; GFX8: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub15 2420 ; GFX8: $vgpr0 = COPY [[COPY5]] 2421 ; GFX8: $vgpr1 = COPY [[COPY6]] 2422 ; GFX8: $vgpr2 = COPY [[COPY7]] 2423 ; GFX8: $vgpr3 = COPY [[COPY8]] 2424 ; GFX8: $vgpr4 = COPY [[COPY9]] 2425 ; GFX8: $vgpr5 = COPY [[COPY10]] 2426 ; GFX8: $vgpr6 = COPY [[COPY11]] 2427 ; GFX8: $vgpr7 = COPY [[COPY12]] 2428 ; GFX8: $vgpr8 = COPY [[COPY13]] 2429 ; GFX8: $vgpr9 = COPY [[COPY14]] 2430 ; GFX8: $vgpr10 = COPY [[COPY15]] 2431 ; GFX8: $vgpr11 = COPY [[COPY16]] 2432 ; GFX8: $vgpr12 = COPY [[COPY17]] 2433 ; GFX8: $vgpr13 = COPY [[COPY18]] 2434 ; GFX8: $vgpr14 = COPY [[COPY19]] 2435 ; GFX8: $vgpr15 = COPY [[COPY20]] 2436 ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 2437 %soffset = add i32 %soffset.base, 4032 2438 %val = call <16 x float> @llvm.amdgcn.s.buffer.load.v16f32(<4 x i32> %rsrc, i32 %soffset, i32 0) 2439 ret <16 x float> %val 2440} 2441 2442define amdgpu_ps <16 x float> @s_buffer_load_v16f32_vgpr_offset_add_4036(<4 x i32> inreg %rsrc, i32 %soffset.base) { 2443 ; GFX6-LABEL: name: s_buffer_load_v16f32_vgpr_offset_add_4036 2444 ; GFX6: bb.1 (%ir-block.0): 2445 ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 2446 ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 2447 ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 2448 ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 2449 ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 2450 ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 2451 ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 2452 ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4036 2453 ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) 2454 ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) 2455 ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 32, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16 + 16, align 4) 2456 ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 48, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16 + 48, align 4) 2457 ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7, [[BUFFER_LOAD_DWORDX4_OFFEN2]], %subreg.sub8_sub9_sub10_sub11, [[BUFFER_LOAD_DWORDX4_OFFEN3]], %subreg.sub12_sub13_sub14_sub15 2458 ; GFX6: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 2459 ; GFX6: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 2460 ; GFX6: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2 2461 ; GFX6: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3 2462 ; GFX6: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4 2463 ; GFX6: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5 2464 ; GFX6: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6 2465 ; GFX6: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7 2466 ; GFX6: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub8 2467 ; GFX6: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub9 2468 ; GFX6: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub10 2469 ; GFX6: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub11 2470 ; GFX6: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub12 2471 ; GFX6: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub13 2472 ; GFX6: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub14 2473 ; GFX6: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub15 2474 ; GFX6: $vgpr0 = COPY [[COPY5]] 2475 ; GFX6: $vgpr1 = COPY [[COPY6]] 2476 ; GFX6: $vgpr2 = COPY [[COPY7]] 2477 ; GFX6: $vgpr3 = COPY [[COPY8]] 2478 ; GFX6: $vgpr4 = COPY [[COPY9]] 2479 ; GFX6: $vgpr5 = COPY [[COPY10]] 2480 ; GFX6: $vgpr6 = COPY [[COPY11]] 2481 ; GFX6: $vgpr7 = COPY [[COPY12]] 2482 ; GFX6: $vgpr8 = COPY [[COPY13]] 2483 ; GFX6: $vgpr9 = COPY [[COPY14]] 2484 ; GFX6: $vgpr10 = COPY [[COPY15]] 2485 ; GFX6: $vgpr11 = COPY [[COPY16]] 2486 ; GFX6: $vgpr12 = COPY [[COPY17]] 2487 ; GFX6: $vgpr13 = COPY [[COPY18]] 2488 ; GFX6: $vgpr14 = COPY [[COPY19]] 2489 ; GFX6: $vgpr15 = COPY [[COPY20]] 2490 ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 2491 ; GFX7-LABEL: name: s_buffer_load_v16f32_vgpr_offset_add_4036 2492 ; GFX7: bb.1 (%ir-block.0): 2493 ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 2494 ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 2495 ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 2496 ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 2497 ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 2498 ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 2499 ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 2500 ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4036 2501 ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) 2502 ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) 2503 ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 32, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16 + 16, align 4) 2504 ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 48, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16 + 48, align 4) 2505 ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7, [[BUFFER_LOAD_DWORDX4_OFFEN2]], %subreg.sub8_sub9_sub10_sub11, [[BUFFER_LOAD_DWORDX4_OFFEN3]], %subreg.sub12_sub13_sub14_sub15 2506 ; GFX7: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 2507 ; GFX7: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 2508 ; GFX7: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2 2509 ; GFX7: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3 2510 ; GFX7: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4 2511 ; GFX7: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5 2512 ; GFX7: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6 2513 ; GFX7: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7 2514 ; GFX7: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub8 2515 ; GFX7: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub9 2516 ; GFX7: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub10 2517 ; GFX7: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub11 2518 ; GFX7: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub12 2519 ; GFX7: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub13 2520 ; GFX7: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub14 2521 ; GFX7: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub15 2522 ; GFX7: $vgpr0 = COPY [[COPY5]] 2523 ; GFX7: $vgpr1 = COPY [[COPY6]] 2524 ; GFX7: $vgpr2 = COPY [[COPY7]] 2525 ; GFX7: $vgpr3 = COPY [[COPY8]] 2526 ; GFX7: $vgpr4 = COPY [[COPY9]] 2527 ; GFX7: $vgpr5 = COPY [[COPY10]] 2528 ; GFX7: $vgpr6 = COPY [[COPY11]] 2529 ; GFX7: $vgpr7 = COPY [[COPY12]] 2530 ; GFX7: $vgpr8 = COPY [[COPY13]] 2531 ; GFX7: $vgpr9 = COPY [[COPY14]] 2532 ; GFX7: $vgpr10 = COPY [[COPY15]] 2533 ; GFX7: $vgpr11 = COPY [[COPY16]] 2534 ; GFX7: $vgpr12 = COPY [[COPY17]] 2535 ; GFX7: $vgpr13 = COPY [[COPY18]] 2536 ; GFX7: $vgpr14 = COPY [[COPY19]] 2537 ; GFX7: $vgpr15 = COPY [[COPY20]] 2538 ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 2539 ; GFX8-LABEL: name: s_buffer_load_v16f32_vgpr_offset_add_4036 2540 ; GFX8: bb.1 (%ir-block.0): 2541 ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 2542 ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 2543 ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 2544 ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 2545 ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 2546 ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 2547 ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 2548 ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4 2549 ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4032, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) 2550 ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4048, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) 2551 ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16 + 16, align 4) 2552 ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16 + 48, align 4) 2553 ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7, [[BUFFER_LOAD_DWORDX4_OFFEN2]], %subreg.sub8_sub9_sub10_sub11, [[BUFFER_LOAD_DWORDX4_OFFEN3]], %subreg.sub12_sub13_sub14_sub15 2554 ; GFX8: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 2555 ; GFX8: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 2556 ; GFX8: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2 2557 ; GFX8: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3 2558 ; GFX8: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4 2559 ; GFX8: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5 2560 ; GFX8: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6 2561 ; GFX8: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7 2562 ; GFX8: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub8 2563 ; GFX8: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub9 2564 ; GFX8: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub10 2565 ; GFX8: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub11 2566 ; GFX8: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub12 2567 ; GFX8: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub13 2568 ; GFX8: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub14 2569 ; GFX8: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub15 2570 ; GFX8: $vgpr0 = COPY [[COPY5]] 2571 ; GFX8: $vgpr1 = COPY [[COPY6]] 2572 ; GFX8: $vgpr2 = COPY [[COPY7]] 2573 ; GFX8: $vgpr3 = COPY [[COPY8]] 2574 ; GFX8: $vgpr4 = COPY [[COPY9]] 2575 ; GFX8: $vgpr5 = COPY [[COPY10]] 2576 ; GFX8: $vgpr6 = COPY [[COPY11]] 2577 ; GFX8: $vgpr7 = COPY [[COPY12]] 2578 ; GFX8: $vgpr8 = COPY [[COPY13]] 2579 ; GFX8: $vgpr9 = COPY [[COPY14]] 2580 ; GFX8: $vgpr10 = COPY [[COPY15]] 2581 ; GFX8: $vgpr11 = COPY [[COPY16]] 2582 ; GFX8: $vgpr12 = COPY [[COPY17]] 2583 ; GFX8: $vgpr13 = COPY [[COPY18]] 2584 ; GFX8: $vgpr14 = COPY [[COPY19]] 2585 ; GFX8: $vgpr15 = COPY [[COPY20]] 2586 ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 2587 %soffset = add i32 %soffset.base, 4036 2588 %val = call <16 x float> @llvm.amdgcn.s.buffer.load.v16f32(<4 x i32> %rsrc, i32 %soffset, i32 0) 2589 ret <16 x float> %val 2590} 2591 2592; Waterfall loop due to resource being VGPR 2593define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc(<4 x i32> %rsrc, i32 inreg %soffset) { 2594 ; GFX6-LABEL: name: s_buffer_load_f32_vgpr_rsrc 2595 ; GFX6: bb.1 (%ir-block.0): 2596 ; GFX6: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 2597 ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 2598 ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 2599 ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 2600 ; GFX6: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 2601 ; GFX6: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 2602 ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 2603 ; GFX6: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY4]] 2604 ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 2605 ; GFX6: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 2606 ; GFX6: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 2607 ; GFX6: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec 2608 ; GFX6: bb.2: 2609 ; GFX6: successors: %bb.3, %bb.2 2610 ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec 2611 ; GFX6: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec 2612 ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 2613 ; GFX6: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec 2614 ; GFX6: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec 2615 ; GFX6: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec 2616 ; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 2617 ; GFX6: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec 2618 ; GFX6: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc 2619 ; GFX6: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 2620 ; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4) 2621 ; GFX6: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec 2622 ; GFX6: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 2623 ; GFX6: S_CBRANCH_EXECNZ %bb.2, implicit $exec 2624 ; GFX6: bb.3: 2625 ; GFX6: $exec = S_MOV_B64_term [[S_MOV_B64_term]] 2626 ; GFX6: bb.4: 2627 ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] 2628 ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0 2629 ; GFX7-LABEL: name: s_buffer_load_f32_vgpr_rsrc 2630 ; GFX7: bb.1 (%ir-block.0): 2631 ; GFX7: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 2632 ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 2633 ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 2634 ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 2635 ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 2636 ; GFX7: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 2637 ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 2638 ; GFX7: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY4]] 2639 ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 2640 ; GFX7: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 2641 ; GFX7: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 2642 ; GFX7: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec 2643 ; GFX7: bb.2: 2644 ; GFX7: successors: %bb.3, %bb.2 2645 ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec 2646 ; GFX7: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec 2647 ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 2648 ; GFX7: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec 2649 ; GFX7: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec 2650 ; GFX7: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec 2651 ; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 2652 ; GFX7: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec 2653 ; GFX7: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc 2654 ; GFX7: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 2655 ; GFX7: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4) 2656 ; GFX7: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec 2657 ; GFX7: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 2658 ; GFX7: S_CBRANCH_EXECNZ %bb.2, implicit $exec 2659 ; GFX7: bb.3: 2660 ; GFX7: $exec = S_MOV_B64_term [[S_MOV_B64_term]] 2661 ; GFX7: bb.4: 2662 ; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] 2663 ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0 2664 ; GFX8-LABEL: name: s_buffer_load_f32_vgpr_rsrc 2665 ; GFX8: bb.1 (%ir-block.0): 2666 ; GFX8: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 2667 ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 2668 ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 2669 ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 2670 ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 2671 ; GFX8: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 2672 ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 2673 ; GFX8: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY4]] 2674 ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 2675 ; GFX8: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 2676 ; GFX8: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 2677 ; GFX8: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec 2678 ; GFX8: bb.2: 2679 ; GFX8: successors: %bb.3, %bb.2 2680 ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec 2681 ; GFX8: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec 2682 ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 2683 ; GFX8: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec 2684 ; GFX8: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec 2685 ; GFX8: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec 2686 ; GFX8: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 2687 ; GFX8: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec 2688 ; GFX8: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc 2689 ; GFX8: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 2690 ; GFX8: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4) 2691 ; GFX8: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec 2692 ; GFX8: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 2693 ; GFX8: S_CBRANCH_EXECNZ %bb.2, implicit $exec 2694 ; GFX8: bb.3: 2695 ; GFX8: $exec = S_MOV_B64_term [[S_MOV_B64_term]] 2696 ; GFX8: bb.4: 2697 ; GFX8: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] 2698 ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0 2699 %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %soffset, i32 0) 2700 ret float %val 2701} 2702 2703; Use the offset inside the waterfall loop 2704define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_soffset_add_4092(<4 x i32> %rsrc, i32 inreg %soffset.base) { 2705 ; GFX6-LABEL: name: s_buffer_load_f32_vgpr_rsrc_soffset_add_4092 2706 ; GFX6: bb.1 (%ir-block.0): 2707 ; GFX6: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 2708 ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 2709 ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 2710 ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 2711 ; GFX6: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 2712 ; GFX6: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 2713 ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 2714 ; GFX6: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 2715 ; GFX6: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 2716 ; GFX6: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec 2717 ; GFX6: bb.2: 2718 ; GFX6: successors: %bb.3, %bb.2 2719 ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec 2720 ; GFX6: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec 2721 ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 2722 ; GFX6: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec 2723 ; GFX6: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec 2724 ; GFX6: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec 2725 ; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 2726 ; GFX6: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec 2727 ; GFX6: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc 2728 ; GFX6: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 2729 ; GFX6: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE3]], [[COPY4]], 4092, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4) 2730 ; GFX6: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec 2731 ; GFX6: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 2732 ; GFX6: S_CBRANCH_EXECNZ %bb.2, implicit $exec 2733 ; GFX6: bb.3: 2734 ; GFX6: $exec = S_MOV_B64_term [[S_MOV_B64_term]] 2735 ; GFX6: bb.4: 2736 ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]] 2737 ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0 2738 ; GFX7-LABEL: name: s_buffer_load_f32_vgpr_rsrc_soffset_add_4092 2739 ; GFX7: bb.1 (%ir-block.0): 2740 ; GFX7: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 2741 ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 2742 ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 2743 ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 2744 ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 2745 ; GFX7: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 2746 ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 2747 ; GFX7: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 2748 ; GFX7: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 2749 ; GFX7: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec 2750 ; GFX7: bb.2: 2751 ; GFX7: successors: %bb.3, %bb.2 2752 ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec 2753 ; GFX7: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec 2754 ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 2755 ; GFX7: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec 2756 ; GFX7: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec 2757 ; GFX7: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec 2758 ; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 2759 ; GFX7: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec 2760 ; GFX7: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc 2761 ; GFX7: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 2762 ; GFX7: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE3]], [[COPY4]], 4092, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4) 2763 ; GFX7: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec 2764 ; GFX7: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 2765 ; GFX7: S_CBRANCH_EXECNZ %bb.2, implicit $exec 2766 ; GFX7: bb.3: 2767 ; GFX7: $exec = S_MOV_B64_term [[S_MOV_B64_term]] 2768 ; GFX7: bb.4: 2769 ; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]] 2770 ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0 2771 ; GFX8-LABEL: name: s_buffer_load_f32_vgpr_rsrc_soffset_add_4092 2772 ; GFX8: bb.1 (%ir-block.0): 2773 ; GFX8: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 2774 ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 2775 ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 2776 ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 2777 ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 2778 ; GFX8: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 2779 ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 2780 ; GFX8: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 2781 ; GFX8: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 2782 ; GFX8: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec 2783 ; GFX8: bb.2: 2784 ; GFX8: successors: %bb.3, %bb.2 2785 ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec 2786 ; GFX8: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec 2787 ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 2788 ; GFX8: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec 2789 ; GFX8: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec 2790 ; GFX8: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec 2791 ; GFX8: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 2792 ; GFX8: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec 2793 ; GFX8: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc 2794 ; GFX8: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 2795 ; GFX8: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE3]], [[COPY4]], 4092, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4) 2796 ; GFX8: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec 2797 ; GFX8: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 2798 ; GFX8: S_CBRANCH_EXECNZ %bb.2, implicit $exec 2799 ; GFX8: bb.3: 2800 ; GFX8: $exec = S_MOV_B64_term [[S_MOV_B64_term]] 2801 ; GFX8: bb.4: 2802 ; GFX8: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]] 2803 ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0 2804 %soffset = add i32 %soffset.base, 4092 2805 %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %soffset, i32 0) 2806 ret float %val 2807} 2808 2809; Scalar offset exceeds MUBUF limit, keep add out of the loop 2810define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_soffset_add_4096(<4 x i32> %rsrc, i32 inreg %soffset.base) { 2811 ; GFX6-LABEL: name: s_buffer_load_f32_vgpr_rsrc_soffset_add_4096 2812 ; GFX6: bb.1 (%ir-block.0): 2813 ; GFX6: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 2814 ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 2815 ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 2816 ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 2817 ; GFX6: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 2818 ; GFX6: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 2819 ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 2820 ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 2821 ; GFX6: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY4]], [[S_MOV_B32_]], implicit-def $scc 2822 ; GFX6: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]] 2823 ; GFX6: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 2824 ; GFX6: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 2825 ; GFX6: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 2826 ; GFX6: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec 2827 ; GFX6: bb.2: 2828 ; GFX6: successors: %bb.3, %bb.2 2829 ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec 2830 ; GFX6: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec 2831 ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 2832 ; GFX6: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec 2833 ; GFX6: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec 2834 ; GFX6: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec 2835 ; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 2836 ; GFX6: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec 2837 ; GFX6: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc 2838 ; GFX6: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 2839 ; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4) 2840 ; GFX6: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec 2841 ; GFX6: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 2842 ; GFX6: S_CBRANCH_EXECNZ %bb.2, implicit $exec 2843 ; GFX6: bb.3: 2844 ; GFX6: $exec = S_MOV_B64_term [[S_MOV_B64_term]] 2845 ; GFX6: bb.4: 2846 ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] 2847 ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0 2848 ; GFX7-LABEL: name: s_buffer_load_f32_vgpr_rsrc_soffset_add_4096 2849 ; GFX7: bb.1 (%ir-block.0): 2850 ; GFX7: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 2851 ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 2852 ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 2853 ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 2854 ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 2855 ; GFX7: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 2856 ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 2857 ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 2858 ; GFX7: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY4]], [[S_MOV_B32_]], implicit-def $scc 2859 ; GFX7: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]] 2860 ; GFX7: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 2861 ; GFX7: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 2862 ; GFX7: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 2863 ; GFX7: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec 2864 ; GFX7: bb.2: 2865 ; GFX7: successors: %bb.3, %bb.2 2866 ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec 2867 ; GFX7: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec 2868 ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 2869 ; GFX7: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec 2870 ; GFX7: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec 2871 ; GFX7: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec 2872 ; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 2873 ; GFX7: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec 2874 ; GFX7: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc 2875 ; GFX7: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 2876 ; GFX7: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4) 2877 ; GFX7: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec 2878 ; GFX7: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 2879 ; GFX7: S_CBRANCH_EXECNZ %bb.2, implicit $exec 2880 ; GFX7: bb.3: 2881 ; GFX7: $exec = S_MOV_B64_term [[S_MOV_B64_term]] 2882 ; GFX7: bb.4: 2883 ; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] 2884 ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0 2885 ; GFX8-LABEL: name: s_buffer_load_f32_vgpr_rsrc_soffset_add_4096 2886 ; GFX8: bb.1 (%ir-block.0): 2887 ; GFX8: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 2888 ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 2889 ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 2890 ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 2891 ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 2892 ; GFX8: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 2893 ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 2894 ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 2895 ; GFX8: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY4]], [[S_MOV_B32_]], implicit-def $scc 2896 ; GFX8: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]] 2897 ; GFX8: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 2898 ; GFX8: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 2899 ; GFX8: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 2900 ; GFX8: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec 2901 ; GFX8: bb.2: 2902 ; GFX8: successors: %bb.3, %bb.2 2903 ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec 2904 ; GFX8: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec 2905 ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 2906 ; GFX8: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec 2907 ; GFX8: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec 2908 ; GFX8: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec 2909 ; GFX8: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 2910 ; GFX8: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec 2911 ; GFX8: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc 2912 ; GFX8: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 2913 ; GFX8: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4) 2914 ; GFX8: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec 2915 ; GFX8: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 2916 ; GFX8: S_CBRANCH_EXECNZ %bb.2, implicit $exec 2917 ; GFX8: bb.3: 2918 ; GFX8: $exec = S_MOV_B64_term [[S_MOV_B64_term]] 2919 ; GFX8: bb.4: 2920 ; GFX8: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] 2921 ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0 2922 %soffset = add i32 %soffset.base, 4096 2923 %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %soffset, i32 0) 2924 ret float %val 2925} 2926 2927; Waterfall loop, but constant offset 2928define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_offset_4095(<4 x i32> %rsrc) { 2929 ; GFX6-LABEL: name: s_buffer_load_f32_vgpr_rsrc_offset_4095 2930 ; GFX6: bb.1 (%ir-block.0): 2931 ; GFX6: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 2932 ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 2933 ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 2934 ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 2935 ; GFX6: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 2936 ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 2937 ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 2938 ; GFX6: [[COPY4:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 2939 ; GFX6: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 2940 ; GFX6: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec 2941 ; GFX6: bb.2: 2942 ; GFX6: successors: %bb.3, %bb.2 2943 ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub0, implicit $exec 2944 ; GFX6: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub1, implicit $exec 2945 ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 2946 ; GFX6: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY4]], implicit $exec 2947 ; GFX6: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec 2948 ; GFX6: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec 2949 ; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 2950 ; GFX6: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY5]], implicit $exec 2951 ; GFX6: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc 2952 ; GFX6: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 2953 ; GFX6: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4095, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4 + 4095, align 1) 2954 ; GFX6: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec 2955 ; GFX6: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 2956 ; GFX6: S_CBRANCH_EXECNZ %bb.2, implicit $exec 2957 ; GFX6: bb.3: 2958 ; GFX6: $exec = S_MOV_B64_term [[S_MOV_B64_term]] 2959 ; GFX6: bb.4: 2960 ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]] 2961 ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0 2962 ; GFX7-LABEL: name: s_buffer_load_f32_vgpr_rsrc_offset_4095 2963 ; GFX7: bb.1 (%ir-block.0): 2964 ; GFX7: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 2965 ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 2966 ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 2967 ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 2968 ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 2969 ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 2970 ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 2971 ; GFX7: [[COPY4:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 2972 ; GFX7: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 2973 ; GFX7: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec 2974 ; GFX7: bb.2: 2975 ; GFX7: successors: %bb.3, %bb.2 2976 ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub0, implicit $exec 2977 ; GFX7: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub1, implicit $exec 2978 ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 2979 ; GFX7: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY4]], implicit $exec 2980 ; GFX7: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec 2981 ; GFX7: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec 2982 ; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 2983 ; GFX7: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY5]], implicit $exec 2984 ; GFX7: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc 2985 ; GFX7: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 2986 ; GFX7: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4095, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4 + 4095, align 1) 2987 ; GFX7: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec 2988 ; GFX7: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 2989 ; GFX7: S_CBRANCH_EXECNZ %bb.2, implicit $exec 2990 ; GFX7: bb.3: 2991 ; GFX7: $exec = S_MOV_B64_term [[S_MOV_B64_term]] 2992 ; GFX7: bb.4: 2993 ; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]] 2994 ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0 2995 ; GFX8-LABEL: name: s_buffer_load_f32_vgpr_rsrc_offset_4095 2996 ; GFX8: bb.1 (%ir-block.0): 2997 ; GFX8: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 2998 ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 2999 ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 3000 ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 3001 ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 3002 ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 3003 ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 3004 ; GFX8: [[COPY4:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 3005 ; GFX8: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 3006 ; GFX8: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec 3007 ; GFX8: bb.2: 3008 ; GFX8: successors: %bb.3, %bb.2 3009 ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub0, implicit $exec 3010 ; GFX8: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub1, implicit $exec 3011 ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 3012 ; GFX8: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY4]], implicit $exec 3013 ; GFX8: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec 3014 ; GFX8: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec 3015 ; GFX8: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 3016 ; GFX8: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY5]], implicit $exec 3017 ; GFX8: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc 3018 ; GFX8: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 3019 ; GFX8: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4095, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4 + 4095, align 1) 3020 ; GFX8: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec 3021 ; GFX8: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 3022 ; GFX8: S_CBRANCH_EXECNZ %bb.2, implicit $exec 3023 ; GFX8: bb.3: 3024 ; GFX8: $exec = S_MOV_B64_term [[S_MOV_B64_term]] 3025 ; GFX8: bb.4: 3026 ; GFX8: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]] 3027 ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0 3028 %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 4095, i32 0) 3029 ret float %val 3030} 3031 3032; Waterfall loop, but constant offset 3033define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_offset_4096(<4 x i32> %rsrc) { 3034 ; GFX6-LABEL: name: s_buffer_load_f32_vgpr_rsrc_offset_4096 3035 ; GFX6: bb.1 (%ir-block.0): 3036 ; GFX6: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 3037 ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 3038 ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 3039 ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 3040 ; GFX6: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 3041 ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 3042 ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 3043 ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] 3044 ; GFX6: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 3045 ; GFX6: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 3046 ; GFX6: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 3047 ; GFX6: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec 3048 ; GFX6: bb.2: 3049 ; GFX6: successors: %bb.3, %bb.2 3050 ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec 3051 ; GFX6: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec 3052 ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 3053 ; GFX6: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec 3054 ; GFX6: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec 3055 ; GFX6: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec 3056 ; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 3057 ; GFX6: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec 3058 ; GFX6: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc 3059 ; GFX6: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 3060 ; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4) 3061 ; GFX6: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec 3062 ; GFX6: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 3063 ; GFX6: S_CBRANCH_EXECNZ %bb.2, implicit $exec 3064 ; GFX6: bb.3: 3065 ; GFX6: $exec = S_MOV_B64_term [[S_MOV_B64_term]] 3066 ; GFX6: bb.4: 3067 ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] 3068 ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0 3069 ; GFX7-LABEL: name: s_buffer_load_f32_vgpr_rsrc_offset_4096 3070 ; GFX7: bb.1 (%ir-block.0): 3071 ; GFX7: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 3072 ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 3073 ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 3074 ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 3075 ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 3076 ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 3077 ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 3078 ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] 3079 ; GFX7: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 3080 ; GFX7: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 3081 ; GFX7: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 3082 ; GFX7: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec 3083 ; GFX7: bb.2: 3084 ; GFX7: successors: %bb.3, %bb.2 3085 ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec 3086 ; GFX7: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec 3087 ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 3088 ; GFX7: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec 3089 ; GFX7: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec 3090 ; GFX7: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec 3091 ; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 3092 ; GFX7: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec 3093 ; GFX7: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc 3094 ; GFX7: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 3095 ; GFX7: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4) 3096 ; GFX7: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec 3097 ; GFX7: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 3098 ; GFX7: S_CBRANCH_EXECNZ %bb.2, implicit $exec 3099 ; GFX7: bb.3: 3100 ; GFX7: $exec = S_MOV_B64_term [[S_MOV_B64_term]] 3101 ; GFX7: bb.4: 3102 ; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] 3103 ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0 3104 ; GFX8-LABEL: name: s_buffer_load_f32_vgpr_rsrc_offset_4096 3105 ; GFX8: bb.1 (%ir-block.0): 3106 ; GFX8: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 3107 ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 3108 ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 3109 ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 3110 ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 3111 ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 3112 ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1 3113 ; GFX8: [[COPY4:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 3114 ; GFX8: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 3115 ; GFX8: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec 3116 ; GFX8: bb.2: 3117 ; GFX8: successors: %bb.3, %bb.2 3118 ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub0, implicit $exec 3119 ; GFX8: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub1, implicit $exec 3120 ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 3121 ; GFX8: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY4]], implicit $exec 3122 ; GFX8: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec 3123 ; GFX8: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec 3124 ; GFX8: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 3125 ; GFX8: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY5]], implicit $exec 3126 ; GFX8: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc 3127 ; GFX8: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 3128 ; GFX8: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4095, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4 + 4096) 3129 ; GFX8: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec 3130 ; GFX8: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 3131 ; GFX8: S_CBRANCH_EXECNZ %bb.2, implicit $exec 3132 ; GFX8: bb.3: 3133 ; GFX8: $exec = S_MOV_B64_term [[S_MOV_B64_term]] 3134 ; GFX8: bb.4: 3135 ; GFX8: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]] 3136 ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0 3137 %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 4096, i32 0) 3138 ret float %val 3139} 3140 3141; Need a waterfall loop, but the offset is scalar. 3142; Make sure the base offset is added to each split load. 3143define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4064(<4 x i32> %rsrc, i32 inreg %soffset.base) { 3144 ; GFX6-LABEL: name: s_buffer_load_v8f32_vgpr_rsrc_add_4064 3145 ; GFX6: bb.1 (%ir-block.0): 3146 ; GFX6: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 3147 ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 3148 ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 3149 ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 3150 ; GFX6: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 3151 ; GFX6: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 3152 ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 3153 ; GFX6: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 3154 ; GFX6: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 3155 ; GFX6: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec 3156 ; GFX6: bb.2: 3157 ; GFX6: successors: %bb.3, %bb.2 3158 ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec 3159 ; GFX6: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec 3160 ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 3161 ; GFX6: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec 3162 ; GFX6: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec 3163 ; GFX6: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec 3164 ; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 3165 ; GFX6: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec 3166 ; GFX6: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc 3167 ; GFX6: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 3168 ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[COPY4]], 4064, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) 3169 ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[COPY4]], 4080, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) 3170 ; GFX6: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec 3171 ; GFX6: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 3172 ; GFX6: S_CBRANCH_EXECNZ %bb.2, implicit $exec 3173 ; GFX6: bb.3: 3174 ; GFX6: $exec = S_MOV_B64_term [[S_MOV_B64_term]] 3175 ; GFX6: bb.4: 3176 ; GFX6: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFSET]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFSET1]], %subreg.sub4_sub5_sub6_sub7 3177 ; GFX6: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 3178 ; GFX6: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 3179 ; GFX6: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 3180 ; GFX6: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 3181 ; GFX6: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 3182 ; GFX6: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 3183 ; GFX6: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 3184 ; GFX6: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 3185 ; GFX6: $vgpr0 = COPY [[COPY7]] 3186 ; GFX6: $vgpr1 = COPY [[COPY8]] 3187 ; GFX6: $vgpr2 = COPY [[COPY9]] 3188 ; GFX6: $vgpr3 = COPY [[COPY10]] 3189 ; GFX6: $vgpr4 = COPY [[COPY11]] 3190 ; GFX6: $vgpr5 = COPY [[COPY12]] 3191 ; GFX6: $vgpr6 = COPY [[COPY13]] 3192 ; GFX6: $vgpr7 = COPY [[COPY14]] 3193 ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 3194 ; GFX7-LABEL: name: s_buffer_load_v8f32_vgpr_rsrc_add_4064 3195 ; GFX7: bb.1 (%ir-block.0): 3196 ; GFX7: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 3197 ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 3198 ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 3199 ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 3200 ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 3201 ; GFX7: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 3202 ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 3203 ; GFX7: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 3204 ; GFX7: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 3205 ; GFX7: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec 3206 ; GFX7: bb.2: 3207 ; GFX7: successors: %bb.3, %bb.2 3208 ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec 3209 ; GFX7: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec 3210 ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 3211 ; GFX7: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec 3212 ; GFX7: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec 3213 ; GFX7: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec 3214 ; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 3215 ; GFX7: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec 3216 ; GFX7: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc 3217 ; GFX7: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 3218 ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[COPY4]], 4064, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) 3219 ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[COPY4]], 4080, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) 3220 ; GFX7: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec 3221 ; GFX7: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 3222 ; GFX7: S_CBRANCH_EXECNZ %bb.2, implicit $exec 3223 ; GFX7: bb.3: 3224 ; GFX7: $exec = S_MOV_B64_term [[S_MOV_B64_term]] 3225 ; GFX7: bb.4: 3226 ; GFX7: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFSET]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFSET1]], %subreg.sub4_sub5_sub6_sub7 3227 ; GFX7: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 3228 ; GFX7: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 3229 ; GFX7: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 3230 ; GFX7: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 3231 ; GFX7: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 3232 ; GFX7: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 3233 ; GFX7: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 3234 ; GFX7: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 3235 ; GFX7: $vgpr0 = COPY [[COPY7]] 3236 ; GFX7: $vgpr1 = COPY [[COPY8]] 3237 ; GFX7: $vgpr2 = COPY [[COPY9]] 3238 ; GFX7: $vgpr3 = COPY [[COPY10]] 3239 ; GFX7: $vgpr4 = COPY [[COPY11]] 3240 ; GFX7: $vgpr5 = COPY [[COPY12]] 3241 ; GFX7: $vgpr6 = COPY [[COPY13]] 3242 ; GFX7: $vgpr7 = COPY [[COPY14]] 3243 ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 3244 ; GFX8-LABEL: name: s_buffer_load_v8f32_vgpr_rsrc_add_4064 3245 ; GFX8: bb.1 (%ir-block.0): 3246 ; GFX8: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 3247 ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 3248 ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 3249 ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 3250 ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 3251 ; GFX8: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 3252 ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 3253 ; GFX8: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 3254 ; GFX8: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 3255 ; GFX8: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec 3256 ; GFX8: bb.2: 3257 ; GFX8: successors: %bb.3, %bb.2 3258 ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec 3259 ; GFX8: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec 3260 ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 3261 ; GFX8: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec 3262 ; GFX8: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec 3263 ; GFX8: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec 3264 ; GFX8: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 3265 ; GFX8: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec 3266 ; GFX8: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc 3267 ; GFX8: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 3268 ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[COPY4]], 4064, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) 3269 ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[COPY4]], 4080, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) 3270 ; GFX8: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec 3271 ; GFX8: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 3272 ; GFX8: S_CBRANCH_EXECNZ %bb.2, implicit $exec 3273 ; GFX8: bb.3: 3274 ; GFX8: $exec = S_MOV_B64_term [[S_MOV_B64_term]] 3275 ; GFX8: bb.4: 3276 ; GFX8: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFSET]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFSET1]], %subreg.sub4_sub5_sub6_sub7 3277 ; GFX8: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 3278 ; GFX8: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 3279 ; GFX8: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 3280 ; GFX8: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 3281 ; GFX8: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 3282 ; GFX8: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 3283 ; GFX8: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 3284 ; GFX8: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 3285 ; GFX8: $vgpr0 = COPY [[COPY7]] 3286 ; GFX8: $vgpr1 = COPY [[COPY8]] 3287 ; GFX8: $vgpr2 = COPY [[COPY9]] 3288 ; GFX8: $vgpr3 = COPY [[COPY10]] 3289 ; GFX8: $vgpr4 = COPY [[COPY11]] 3290 ; GFX8: $vgpr5 = COPY [[COPY12]] 3291 ; GFX8: $vgpr6 = COPY [[COPY13]] 3292 ; GFX8: $vgpr7 = COPY [[COPY14]] 3293 ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 3294 %soffset = add i32 %soffset.base, 4064 3295 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0) 3296 ret <8 x float> %val 3297} 3298 3299; Need a waterfall loop, but the offset is scalar. 3300; Make sure the maximum offset isn't exeeded when splitting this 3301define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4068(<4 x i32> %rsrc, i32 inreg %soffset.base) { 3302 ; GFX6-LABEL: name: s_buffer_load_v8f32_vgpr_rsrc_add_4068 3303 ; GFX6: bb.1 (%ir-block.0): 3304 ; GFX6: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 3305 ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 3306 ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 3307 ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 3308 ; GFX6: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 3309 ; GFX6: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 3310 ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 3311 ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4068 3312 ; GFX6: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY4]], [[S_MOV_B32_]], implicit-def $scc 3313 ; GFX6: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]] 3314 ; GFX6: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 3315 ; GFX6: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 3316 ; GFX6: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 3317 ; GFX6: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec 3318 ; GFX6: bb.2: 3319 ; GFX6: successors: %bb.3, %bb.2 3320 ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec 3321 ; GFX6: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec 3322 ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 3323 ; GFX6: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec 3324 ; GFX6: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec 3325 ; GFX6: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec 3326 ; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 3327 ; GFX6: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec 3328 ; GFX6: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc 3329 ; GFX6: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 3330 ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) 3331 ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 16, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) 3332 ; GFX6: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec 3333 ; GFX6: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 3334 ; GFX6: S_CBRANCH_EXECNZ %bb.2, implicit $exec 3335 ; GFX6: bb.3: 3336 ; GFX6: $exec = S_MOV_B64_term [[S_MOV_B64_term]] 3337 ; GFX6: bb.4: 3338 ; GFX6: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 3339 ; GFX6: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 3340 ; GFX6: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 3341 ; GFX6: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 3342 ; GFX6: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 3343 ; GFX6: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 3344 ; GFX6: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 3345 ; GFX6: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 3346 ; GFX6: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 3347 ; GFX6: $vgpr0 = COPY [[COPY8]] 3348 ; GFX6: $vgpr1 = COPY [[COPY9]] 3349 ; GFX6: $vgpr2 = COPY [[COPY10]] 3350 ; GFX6: $vgpr3 = COPY [[COPY11]] 3351 ; GFX6: $vgpr4 = COPY [[COPY12]] 3352 ; GFX6: $vgpr5 = COPY [[COPY13]] 3353 ; GFX6: $vgpr6 = COPY [[COPY14]] 3354 ; GFX6: $vgpr7 = COPY [[COPY15]] 3355 ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 3356 ; GFX7-LABEL: name: s_buffer_load_v8f32_vgpr_rsrc_add_4068 3357 ; GFX7: bb.1 (%ir-block.0): 3358 ; GFX7: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 3359 ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 3360 ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 3361 ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 3362 ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 3363 ; GFX7: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 3364 ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 3365 ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4068 3366 ; GFX7: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY4]], [[S_MOV_B32_]], implicit-def $scc 3367 ; GFX7: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]] 3368 ; GFX7: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 3369 ; GFX7: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 3370 ; GFX7: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 3371 ; GFX7: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec 3372 ; GFX7: bb.2: 3373 ; GFX7: successors: %bb.3, %bb.2 3374 ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec 3375 ; GFX7: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec 3376 ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 3377 ; GFX7: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec 3378 ; GFX7: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec 3379 ; GFX7: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec 3380 ; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 3381 ; GFX7: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec 3382 ; GFX7: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc 3383 ; GFX7: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 3384 ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) 3385 ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 16, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) 3386 ; GFX7: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec 3387 ; GFX7: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 3388 ; GFX7: S_CBRANCH_EXECNZ %bb.2, implicit $exec 3389 ; GFX7: bb.3: 3390 ; GFX7: $exec = S_MOV_B64_term [[S_MOV_B64_term]] 3391 ; GFX7: bb.4: 3392 ; GFX7: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 3393 ; GFX7: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 3394 ; GFX7: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 3395 ; GFX7: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 3396 ; GFX7: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 3397 ; GFX7: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 3398 ; GFX7: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 3399 ; GFX7: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 3400 ; GFX7: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 3401 ; GFX7: $vgpr0 = COPY [[COPY8]] 3402 ; GFX7: $vgpr1 = COPY [[COPY9]] 3403 ; GFX7: $vgpr2 = COPY [[COPY10]] 3404 ; GFX7: $vgpr3 = COPY [[COPY11]] 3405 ; GFX7: $vgpr4 = COPY [[COPY12]] 3406 ; GFX7: $vgpr5 = COPY [[COPY13]] 3407 ; GFX7: $vgpr6 = COPY [[COPY14]] 3408 ; GFX7: $vgpr7 = COPY [[COPY15]] 3409 ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 3410 ; GFX8-LABEL: name: s_buffer_load_v8f32_vgpr_rsrc_add_4068 3411 ; GFX8: bb.1 (%ir-block.0): 3412 ; GFX8: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 3413 ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 3414 ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 3415 ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 3416 ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 3417 ; GFX8: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 3418 ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 3419 ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4068 3420 ; GFX8: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY4]], [[S_MOV_B32_]], implicit-def $scc 3421 ; GFX8: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]] 3422 ; GFX8: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 3423 ; GFX8: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 3424 ; GFX8: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 3425 ; GFX8: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec 3426 ; GFX8: bb.2: 3427 ; GFX8: successors: %bb.3, %bb.2 3428 ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec 3429 ; GFX8: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec 3430 ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 3431 ; GFX8: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec 3432 ; GFX8: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec 3433 ; GFX8: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec 3434 ; GFX8: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 3435 ; GFX8: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec 3436 ; GFX8: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc 3437 ; GFX8: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 3438 ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) 3439 ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 16, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) 3440 ; GFX8: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec 3441 ; GFX8: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 3442 ; GFX8: S_CBRANCH_EXECNZ %bb.2, implicit $exec 3443 ; GFX8: bb.3: 3444 ; GFX8: $exec = S_MOV_B64_term [[S_MOV_B64_term]] 3445 ; GFX8: bb.4: 3446 ; GFX8: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 3447 ; GFX8: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 3448 ; GFX8: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 3449 ; GFX8: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 3450 ; GFX8: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 3451 ; GFX8: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 3452 ; GFX8: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 3453 ; GFX8: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 3454 ; GFX8: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 3455 ; GFX8: $vgpr0 = COPY [[COPY8]] 3456 ; GFX8: $vgpr1 = COPY [[COPY9]] 3457 ; GFX8: $vgpr2 = COPY [[COPY10]] 3458 ; GFX8: $vgpr3 = COPY [[COPY11]] 3459 ; GFX8: $vgpr4 = COPY [[COPY12]] 3460 ; GFX8: $vgpr5 = COPY [[COPY13]] 3461 ; GFX8: $vgpr6 = COPY [[COPY14]] 3462 ; GFX8: $vgpr7 = COPY [[COPY15]] 3463 ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 3464 %soffset = add i32 %soffset.base, 4068 3465 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0) 3466 ret <8 x float> %val 3467} 3468 3469define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4096(<4 x i32> %rsrc, i32 inreg %soffset.base) { 3470 ; GFX6-LABEL: name: s_buffer_load_v8f32_vgpr_rsrc_add_4096 3471 ; GFX6: bb.1 (%ir-block.0): 3472 ; GFX6: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 3473 ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 3474 ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 3475 ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 3476 ; GFX6: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 3477 ; GFX6: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 3478 ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 3479 ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 3480 ; GFX6: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY4]], [[S_MOV_B32_]], implicit-def $scc 3481 ; GFX6: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]] 3482 ; GFX6: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 3483 ; GFX6: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 3484 ; GFX6: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 3485 ; GFX6: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec 3486 ; GFX6: bb.2: 3487 ; GFX6: successors: %bb.3, %bb.2 3488 ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec 3489 ; GFX6: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec 3490 ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 3491 ; GFX6: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec 3492 ; GFX6: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec 3493 ; GFX6: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec 3494 ; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 3495 ; GFX6: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec 3496 ; GFX6: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc 3497 ; GFX6: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 3498 ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) 3499 ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 16, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) 3500 ; GFX6: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec 3501 ; GFX6: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 3502 ; GFX6: S_CBRANCH_EXECNZ %bb.2, implicit $exec 3503 ; GFX6: bb.3: 3504 ; GFX6: $exec = S_MOV_B64_term [[S_MOV_B64_term]] 3505 ; GFX6: bb.4: 3506 ; GFX6: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 3507 ; GFX6: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 3508 ; GFX6: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 3509 ; GFX6: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 3510 ; GFX6: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 3511 ; GFX6: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 3512 ; GFX6: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 3513 ; GFX6: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 3514 ; GFX6: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 3515 ; GFX6: $vgpr0 = COPY [[COPY8]] 3516 ; GFX6: $vgpr1 = COPY [[COPY9]] 3517 ; GFX6: $vgpr2 = COPY [[COPY10]] 3518 ; GFX6: $vgpr3 = COPY [[COPY11]] 3519 ; GFX6: $vgpr4 = COPY [[COPY12]] 3520 ; GFX6: $vgpr5 = COPY [[COPY13]] 3521 ; GFX6: $vgpr6 = COPY [[COPY14]] 3522 ; GFX6: $vgpr7 = COPY [[COPY15]] 3523 ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 3524 ; GFX7-LABEL: name: s_buffer_load_v8f32_vgpr_rsrc_add_4096 3525 ; GFX7: bb.1 (%ir-block.0): 3526 ; GFX7: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 3527 ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 3528 ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 3529 ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 3530 ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 3531 ; GFX7: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 3532 ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 3533 ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 3534 ; GFX7: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY4]], [[S_MOV_B32_]], implicit-def $scc 3535 ; GFX7: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]] 3536 ; GFX7: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 3537 ; GFX7: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 3538 ; GFX7: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 3539 ; GFX7: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec 3540 ; GFX7: bb.2: 3541 ; GFX7: successors: %bb.3, %bb.2 3542 ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec 3543 ; GFX7: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec 3544 ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 3545 ; GFX7: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec 3546 ; GFX7: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec 3547 ; GFX7: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec 3548 ; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 3549 ; GFX7: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec 3550 ; GFX7: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc 3551 ; GFX7: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 3552 ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) 3553 ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 16, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) 3554 ; GFX7: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec 3555 ; GFX7: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 3556 ; GFX7: S_CBRANCH_EXECNZ %bb.2, implicit $exec 3557 ; GFX7: bb.3: 3558 ; GFX7: $exec = S_MOV_B64_term [[S_MOV_B64_term]] 3559 ; GFX7: bb.4: 3560 ; GFX7: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 3561 ; GFX7: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 3562 ; GFX7: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 3563 ; GFX7: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 3564 ; GFX7: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 3565 ; GFX7: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 3566 ; GFX7: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 3567 ; GFX7: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 3568 ; GFX7: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 3569 ; GFX7: $vgpr0 = COPY [[COPY8]] 3570 ; GFX7: $vgpr1 = COPY [[COPY9]] 3571 ; GFX7: $vgpr2 = COPY [[COPY10]] 3572 ; GFX7: $vgpr3 = COPY [[COPY11]] 3573 ; GFX7: $vgpr4 = COPY [[COPY12]] 3574 ; GFX7: $vgpr5 = COPY [[COPY13]] 3575 ; GFX7: $vgpr6 = COPY [[COPY14]] 3576 ; GFX7: $vgpr7 = COPY [[COPY15]] 3577 ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 3578 ; GFX8-LABEL: name: s_buffer_load_v8f32_vgpr_rsrc_add_4096 3579 ; GFX8: bb.1 (%ir-block.0): 3580 ; GFX8: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 3581 ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 3582 ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 3583 ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 3584 ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 3585 ; GFX8: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 3586 ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 3587 ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 3588 ; GFX8: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY4]], [[S_MOV_B32_]], implicit-def $scc 3589 ; GFX8: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]] 3590 ; GFX8: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 3591 ; GFX8: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 3592 ; GFX8: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 3593 ; GFX8: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec 3594 ; GFX8: bb.2: 3595 ; GFX8: successors: %bb.3, %bb.2 3596 ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec 3597 ; GFX8: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec 3598 ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 3599 ; GFX8: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec 3600 ; GFX8: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec 3601 ; GFX8: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec 3602 ; GFX8: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 3603 ; GFX8: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec 3604 ; GFX8: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc 3605 ; GFX8: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 3606 ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) 3607 ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 16, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) 3608 ; GFX8: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec 3609 ; GFX8: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 3610 ; GFX8: S_CBRANCH_EXECNZ %bb.2, implicit $exec 3611 ; GFX8: bb.3: 3612 ; GFX8: $exec = S_MOV_B64_term [[S_MOV_B64_term]] 3613 ; GFX8: bb.4: 3614 ; GFX8: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 3615 ; GFX8: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 3616 ; GFX8: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 3617 ; GFX8: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 3618 ; GFX8: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 3619 ; GFX8: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 3620 ; GFX8: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 3621 ; GFX8: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 3622 ; GFX8: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 3623 ; GFX8: $vgpr0 = COPY [[COPY8]] 3624 ; GFX8: $vgpr1 = COPY [[COPY9]] 3625 ; GFX8: $vgpr2 = COPY [[COPY10]] 3626 ; GFX8: $vgpr3 = COPY [[COPY11]] 3627 ; GFX8: $vgpr4 = COPY [[COPY12]] 3628 ; GFX8: $vgpr5 = COPY [[COPY13]] 3629 ; GFX8: $vgpr6 = COPY [[COPY14]] 3630 ; GFX8: $vgpr7 = COPY [[COPY15]] 3631 ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 3632 %soffset = add i32 %soffset.base, 4096 3633 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0) 3634 ret <8 x float> %val 3635} 3636 3637define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_5000(<4 x i32> %rsrc, i32 %offset.base) { 3638 ; GFX6-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_5000 3639 ; GFX6: bb.1 (%ir-block.0): 3640 ; GFX6: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 3641 ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 3642 ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 3643 ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 3644 ; GFX6: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 3645 ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 3646 ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 3647 ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 5000 3648 ; GFX6: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 3649 ; GFX6: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 3650 ; GFX6: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec 3651 ; GFX6: bb.2: 3652 ; GFX6: successors: %bb.3, %bb.2 3653 ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec 3654 ; GFX6: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec 3655 ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 3656 ; GFX6: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec 3657 ; GFX6: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec 3658 ; GFX6: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec 3659 ; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 3660 ; GFX6: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec 3661 ; GFX6: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc 3662 ; GFX6: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 3663 ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) 3664 ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 16, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) 3665 ; GFX6: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec 3666 ; GFX6: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 3667 ; GFX6: S_CBRANCH_EXECNZ %bb.2, implicit $exec 3668 ; GFX6: bb.3: 3669 ; GFX6: $exec = S_MOV_B64_term [[S_MOV_B64_term]] 3670 ; GFX6: bb.4: 3671 ; GFX6: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 3672 ; GFX6: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 3673 ; GFX6: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 3674 ; GFX6: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 3675 ; GFX6: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 3676 ; GFX6: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 3677 ; GFX6: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 3678 ; GFX6: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 3679 ; GFX6: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 3680 ; GFX6: $vgpr0 = COPY [[COPY7]] 3681 ; GFX6: $vgpr1 = COPY [[COPY8]] 3682 ; GFX6: $vgpr2 = COPY [[COPY9]] 3683 ; GFX6: $vgpr3 = COPY [[COPY10]] 3684 ; GFX6: $vgpr4 = COPY [[COPY11]] 3685 ; GFX6: $vgpr5 = COPY [[COPY12]] 3686 ; GFX6: $vgpr6 = COPY [[COPY13]] 3687 ; GFX6: $vgpr7 = COPY [[COPY14]] 3688 ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 3689 ; GFX7-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_5000 3690 ; GFX7: bb.1 (%ir-block.0): 3691 ; GFX7: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 3692 ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 3693 ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 3694 ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 3695 ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 3696 ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 3697 ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 3698 ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 5000 3699 ; GFX7: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 3700 ; GFX7: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 3701 ; GFX7: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec 3702 ; GFX7: bb.2: 3703 ; GFX7: successors: %bb.3, %bb.2 3704 ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec 3705 ; GFX7: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec 3706 ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 3707 ; GFX7: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec 3708 ; GFX7: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec 3709 ; GFX7: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec 3710 ; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 3711 ; GFX7: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec 3712 ; GFX7: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc 3713 ; GFX7: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 3714 ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) 3715 ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 16, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) 3716 ; GFX7: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec 3717 ; GFX7: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 3718 ; GFX7: S_CBRANCH_EXECNZ %bb.2, implicit $exec 3719 ; GFX7: bb.3: 3720 ; GFX7: $exec = S_MOV_B64_term [[S_MOV_B64_term]] 3721 ; GFX7: bb.4: 3722 ; GFX7: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 3723 ; GFX7: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 3724 ; GFX7: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 3725 ; GFX7: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 3726 ; GFX7: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 3727 ; GFX7: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 3728 ; GFX7: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 3729 ; GFX7: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 3730 ; GFX7: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 3731 ; GFX7: $vgpr0 = COPY [[COPY7]] 3732 ; GFX7: $vgpr1 = COPY [[COPY8]] 3733 ; GFX7: $vgpr2 = COPY [[COPY9]] 3734 ; GFX7: $vgpr3 = COPY [[COPY10]] 3735 ; GFX7: $vgpr4 = COPY [[COPY11]] 3736 ; GFX7: $vgpr5 = COPY [[COPY12]] 3737 ; GFX7: $vgpr6 = COPY [[COPY13]] 3738 ; GFX7: $vgpr7 = COPY [[COPY14]] 3739 ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 3740 ; GFX8-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_5000 3741 ; GFX8: bb.1 (%ir-block.0): 3742 ; GFX8: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 3743 ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 3744 ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 3745 ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 3746 ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 3747 ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 3748 ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 3749 ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4064 3750 ; GFX8: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 3751 ; GFX8: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 3752 ; GFX8: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec 3753 ; GFX8: bb.2: 3754 ; GFX8: successors: %bb.3, %bb.2 3755 ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec 3756 ; GFX8: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec 3757 ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 3758 ; GFX8: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec 3759 ; GFX8: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec 3760 ; GFX8: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec 3761 ; GFX8: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 3762 ; GFX8: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec 3763 ; GFX8: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc 3764 ; GFX8: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 3765 ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 936, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) 3766 ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 952, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) 3767 ; GFX8: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec 3768 ; GFX8: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 3769 ; GFX8: S_CBRANCH_EXECNZ %bb.2, implicit $exec 3770 ; GFX8: bb.3: 3771 ; GFX8: $exec = S_MOV_B64_term [[S_MOV_B64_term]] 3772 ; GFX8: bb.4: 3773 ; GFX8: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 3774 ; GFX8: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 3775 ; GFX8: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 3776 ; GFX8: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 3777 ; GFX8: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 3778 ; GFX8: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 3779 ; GFX8: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 3780 ; GFX8: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 3781 ; GFX8: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 3782 ; GFX8: $vgpr0 = COPY [[COPY7]] 3783 ; GFX8: $vgpr1 = COPY [[COPY8]] 3784 ; GFX8: $vgpr2 = COPY [[COPY9]] 3785 ; GFX8: $vgpr3 = COPY [[COPY10]] 3786 ; GFX8: $vgpr4 = COPY [[COPY11]] 3787 ; GFX8: $vgpr5 = COPY [[COPY12]] 3788 ; GFX8: $vgpr6 = COPY [[COPY13]] 3789 ; GFX8: $vgpr7 = COPY [[COPY14]] 3790 ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 3791 %soffset = add i32 %offset.base, 5000 3792 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0) 3793 ret <8 x float> %val 3794} 3795 3796define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4076(<4 x i32> %rsrc, i32 %offset.base) { 3797 ; GFX6-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4076 3798 ; GFX6: bb.1 (%ir-block.0): 3799 ; GFX6: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 3800 ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 3801 ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 3802 ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 3803 ; GFX6: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 3804 ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 3805 ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 3806 ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4076 3807 ; GFX6: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 3808 ; GFX6: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 3809 ; GFX6: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec 3810 ; GFX6: bb.2: 3811 ; GFX6: successors: %bb.3, %bb.2 3812 ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec 3813 ; GFX6: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec 3814 ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 3815 ; GFX6: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec 3816 ; GFX6: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec 3817 ; GFX6: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec 3818 ; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 3819 ; GFX6: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec 3820 ; GFX6: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc 3821 ; GFX6: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 3822 ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) 3823 ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 16, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) 3824 ; GFX6: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec 3825 ; GFX6: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 3826 ; GFX6: S_CBRANCH_EXECNZ %bb.2, implicit $exec 3827 ; GFX6: bb.3: 3828 ; GFX6: $exec = S_MOV_B64_term [[S_MOV_B64_term]] 3829 ; GFX6: bb.4: 3830 ; GFX6: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 3831 ; GFX6: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 3832 ; GFX6: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 3833 ; GFX6: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 3834 ; GFX6: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 3835 ; GFX6: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 3836 ; GFX6: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 3837 ; GFX6: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 3838 ; GFX6: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 3839 ; GFX6: $vgpr0 = COPY [[COPY7]] 3840 ; GFX6: $vgpr1 = COPY [[COPY8]] 3841 ; GFX6: $vgpr2 = COPY [[COPY9]] 3842 ; GFX6: $vgpr3 = COPY [[COPY10]] 3843 ; GFX6: $vgpr4 = COPY [[COPY11]] 3844 ; GFX6: $vgpr5 = COPY [[COPY12]] 3845 ; GFX6: $vgpr6 = COPY [[COPY13]] 3846 ; GFX6: $vgpr7 = COPY [[COPY14]] 3847 ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 3848 ; GFX7-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4076 3849 ; GFX7: bb.1 (%ir-block.0): 3850 ; GFX7: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 3851 ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 3852 ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 3853 ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 3854 ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 3855 ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 3856 ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 3857 ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4076 3858 ; GFX7: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 3859 ; GFX7: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 3860 ; GFX7: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec 3861 ; GFX7: bb.2: 3862 ; GFX7: successors: %bb.3, %bb.2 3863 ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec 3864 ; GFX7: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec 3865 ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 3866 ; GFX7: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec 3867 ; GFX7: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec 3868 ; GFX7: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec 3869 ; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 3870 ; GFX7: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec 3871 ; GFX7: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc 3872 ; GFX7: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 3873 ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) 3874 ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 16, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) 3875 ; GFX7: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec 3876 ; GFX7: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 3877 ; GFX7: S_CBRANCH_EXECNZ %bb.2, implicit $exec 3878 ; GFX7: bb.3: 3879 ; GFX7: $exec = S_MOV_B64_term [[S_MOV_B64_term]] 3880 ; GFX7: bb.4: 3881 ; GFX7: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 3882 ; GFX7: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 3883 ; GFX7: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 3884 ; GFX7: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 3885 ; GFX7: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 3886 ; GFX7: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 3887 ; GFX7: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 3888 ; GFX7: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 3889 ; GFX7: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 3890 ; GFX7: $vgpr0 = COPY [[COPY7]] 3891 ; GFX7: $vgpr1 = COPY [[COPY8]] 3892 ; GFX7: $vgpr2 = COPY [[COPY9]] 3893 ; GFX7: $vgpr3 = COPY [[COPY10]] 3894 ; GFX7: $vgpr4 = COPY [[COPY11]] 3895 ; GFX7: $vgpr5 = COPY [[COPY12]] 3896 ; GFX7: $vgpr6 = COPY [[COPY13]] 3897 ; GFX7: $vgpr7 = COPY [[COPY14]] 3898 ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 3899 ; GFX8-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4076 3900 ; GFX8: bb.1 (%ir-block.0): 3901 ; GFX8: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 3902 ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 3903 ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 3904 ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 3905 ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 3906 ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 3907 ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 3908 ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 12 3909 ; GFX8: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 3910 ; GFX8: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 3911 ; GFX8: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec 3912 ; GFX8: bb.2: 3913 ; GFX8: successors: %bb.3, %bb.2 3914 ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec 3915 ; GFX8: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec 3916 ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 3917 ; GFX8: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec 3918 ; GFX8: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec 3919 ; GFX8: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec 3920 ; GFX8: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 3921 ; GFX8: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec 3922 ; GFX8: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc 3923 ; GFX8: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 3924 ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4064, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) 3925 ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4080, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) 3926 ; GFX8: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec 3927 ; GFX8: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 3928 ; GFX8: S_CBRANCH_EXECNZ %bb.2, implicit $exec 3929 ; GFX8: bb.3: 3930 ; GFX8: $exec = S_MOV_B64_term [[S_MOV_B64_term]] 3931 ; GFX8: bb.4: 3932 ; GFX8: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 3933 ; GFX8: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 3934 ; GFX8: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 3935 ; GFX8: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 3936 ; GFX8: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 3937 ; GFX8: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 3938 ; GFX8: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 3939 ; GFX8: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 3940 ; GFX8: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 3941 ; GFX8: $vgpr0 = COPY [[COPY7]] 3942 ; GFX8: $vgpr1 = COPY [[COPY8]] 3943 ; GFX8: $vgpr2 = COPY [[COPY9]] 3944 ; GFX8: $vgpr3 = COPY [[COPY10]] 3945 ; GFX8: $vgpr4 = COPY [[COPY11]] 3946 ; GFX8: $vgpr5 = COPY [[COPY12]] 3947 ; GFX8: $vgpr6 = COPY [[COPY13]] 3948 ; GFX8: $vgpr7 = COPY [[COPY14]] 3949 ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 3950 %soffset = add i32 %offset.base, 4076 3951 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0) 3952 ret <8 x float> %val 3953} 3954 3955define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4080(<4 x i32> %rsrc, i32 %offset.base) { 3956 ; GFX6-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4080 3957 ; GFX6: bb.1 (%ir-block.0): 3958 ; GFX6: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 3959 ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 3960 ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 3961 ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 3962 ; GFX6: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 3963 ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 3964 ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 3965 ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4080 3966 ; GFX6: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 3967 ; GFX6: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 3968 ; GFX6: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec 3969 ; GFX6: bb.2: 3970 ; GFX6: successors: %bb.3, %bb.2 3971 ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec 3972 ; GFX6: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec 3973 ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 3974 ; GFX6: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec 3975 ; GFX6: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec 3976 ; GFX6: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec 3977 ; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 3978 ; GFX6: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec 3979 ; GFX6: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc 3980 ; GFX6: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 3981 ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) 3982 ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 16, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) 3983 ; GFX6: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec 3984 ; GFX6: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 3985 ; GFX6: S_CBRANCH_EXECNZ %bb.2, implicit $exec 3986 ; GFX6: bb.3: 3987 ; GFX6: $exec = S_MOV_B64_term [[S_MOV_B64_term]] 3988 ; GFX6: bb.4: 3989 ; GFX6: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 3990 ; GFX6: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 3991 ; GFX6: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 3992 ; GFX6: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 3993 ; GFX6: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 3994 ; GFX6: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 3995 ; GFX6: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 3996 ; GFX6: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 3997 ; GFX6: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 3998 ; GFX6: $vgpr0 = COPY [[COPY7]] 3999 ; GFX6: $vgpr1 = COPY [[COPY8]] 4000 ; GFX6: $vgpr2 = COPY [[COPY9]] 4001 ; GFX6: $vgpr3 = COPY [[COPY10]] 4002 ; GFX6: $vgpr4 = COPY [[COPY11]] 4003 ; GFX6: $vgpr5 = COPY [[COPY12]] 4004 ; GFX6: $vgpr6 = COPY [[COPY13]] 4005 ; GFX6: $vgpr7 = COPY [[COPY14]] 4006 ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 4007 ; GFX7-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4080 4008 ; GFX7: bb.1 (%ir-block.0): 4009 ; GFX7: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 4010 ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 4011 ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 4012 ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 4013 ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 4014 ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 4015 ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 4016 ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4080 4017 ; GFX7: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 4018 ; GFX7: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 4019 ; GFX7: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec 4020 ; GFX7: bb.2: 4021 ; GFX7: successors: %bb.3, %bb.2 4022 ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec 4023 ; GFX7: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec 4024 ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 4025 ; GFX7: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec 4026 ; GFX7: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec 4027 ; GFX7: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec 4028 ; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 4029 ; GFX7: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec 4030 ; GFX7: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc 4031 ; GFX7: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 4032 ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) 4033 ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 16, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) 4034 ; GFX7: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec 4035 ; GFX7: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 4036 ; GFX7: S_CBRANCH_EXECNZ %bb.2, implicit $exec 4037 ; GFX7: bb.3: 4038 ; GFX7: $exec = S_MOV_B64_term [[S_MOV_B64_term]] 4039 ; GFX7: bb.4: 4040 ; GFX7: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 4041 ; GFX7: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 4042 ; GFX7: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 4043 ; GFX7: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 4044 ; GFX7: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 4045 ; GFX7: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 4046 ; GFX7: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 4047 ; GFX7: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 4048 ; GFX7: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 4049 ; GFX7: $vgpr0 = COPY [[COPY7]] 4050 ; GFX7: $vgpr1 = COPY [[COPY8]] 4051 ; GFX7: $vgpr2 = COPY [[COPY9]] 4052 ; GFX7: $vgpr3 = COPY [[COPY10]] 4053 ; GFX7: $vgpr4 = COPY [[COPY11]] 4054 ; GFX7: $vgpr5 = COPY [[COPY12]] 4055 ; GFX7: $vgpr6 = COPY [[COPY13]] 4056 ; GFX7: $vgpr7 = COPY [[COPY14]] 4057 ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 4058 ; GFX8-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4080 4059 ; GFX8: bb.1 (%ir-block.0): 4060 ; GFX8: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 4061 ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 4062 ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 4063 ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 4064 ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 4065 ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 4066 ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 4067 ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 16 4068 ; GFX8: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 4069 ; GFX8: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 4070 ; GFX8: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec 4071 ; GFX8: bb.2: 4072 ; GFX8: successors: %bb.3, %bb.2 4073 ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec 4074 ; GFX8: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec 4075 ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 4076 ; GFX8: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec 4077 ; GFX8: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec 4078 ; GFX8: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec 4079 ; GFX8: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 4080 ; GFX8: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec 4081 ; GFX8: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc 4082 ; GFX8: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 4083 ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4064, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) 4084 ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4080, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) 4085 ; GFX8: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec 4086 ; GFX8: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 4087 ; GFX8: S_CBRANCH_EXECNZ %bb.2, implicit $exec 4088 ; GFX8: bb.3: 4089 ; GFX8: $exec = S_MOV_B64_term [[S_MOV_B64_term]] 4090 ; GFX8: bb.4: 4091 ; GFX8: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 4092 ; GFX8: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 4093 ; GFX8: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 4094 ; GFX8: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 4095 ; GFX8: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 4096 ; GFX8: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 4097 ; GFX8: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 4098 ; GFX8: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 4099 ; GFX8: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 4100 ; GFX8: $vgpr0 = COPY [[COPY7]] 4101 ; GFX8: $vgpr1 = COPY [[COPY8]] 4102 ; GFX8: $vgpr2 = COPY [[COPY9]] 4103 ; GFX8: $vgpr3 = COPY [[COPY10]] 4104 ; GFX8: $vgpr4 = COPY [[COPY11]] 4105 ; GFX8: $vgpr5 = COPY [[COPY12]] 4106 ; GFX8: $vgpr6 = COPY [[COPY13]] 4107 ; GFX8: $vgpr7 = COPY [[COPY14]] 4108 ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 4109 %soffset = add i32 %offset.base, 4080 4110 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0) 4111 ret <8 x float> %val 4112} 4113 4114define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_offset_4064(<4 x i32> %rsrc, i32 %offset.base) { 4115 ; GFX6-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_offset_4064 4116 ; GFX6: bb.1 (%ir-block.0): 4117 ; GFX6: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 4118 ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 4119 ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 4120 ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 4121 ; GFX6: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 4122 ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 4123 ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 4124 ; GFX6: [[COPY4:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 4125 ; GFX6: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 4126 ; GFX6: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec 4127 ; GFX6: bb.2: 4128 ; GFX6: successors: %bb.3, %bb.2 4129 ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub0, implicit $exec 4130 ; GFX6: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub1, implicit $exec 4131 ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 4132 ; GFX6: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY4]], implicit $exec 4133 ; GFX6: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec 4134 ; GFX6: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec 4135 ; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 4136 ; GFX6: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY5]], implicit $exec 4137 ; GFX6: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc 4138 ; GFX6: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 4139 ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4064, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16 + 4064, align 4) 4140 ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4080, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16 + 4064, align 4) 4141 ; GFX6: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec 4142 ; GFX6: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 4143 ; GFX6: S_CBRANCH_EXECNZ %bb.2, implicit $exec 4144 ; GFX6: bb.3: 4145 ; GFX6: $exec = S_MOV_B64_term [[S_MOV_B64_term]] 4146 ; GFX6: bb.4: 4147 ; GFX6: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFSET]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFSET1]], %subreg.sub4_sub5_sub6_sub7 4148 ; GFX6: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 4149 ; GFX6: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 4150 ; GFX6: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 4151 ; GFX6: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 4152 ; GFX6: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 4153 ; GFX6: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 4154 ; GFX6: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 4155 ; GFX6: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 4156 ; GFX6: $vgpr0 = COPY [[COPY6]] 4157 ; GFX6: $vgpr1 = COPY [[COPY7]] 4158 ; GFX6: $vgpr2 = COPY [[COPY8]] 4159 ; GFX6: $vgpr3 = COPY [[COPY9]] 4160 ; GFX6: $vgpr4 = COPY [[COPY10]] 4161 ; GFX6: $vgpr5 = COPY [[COPY11]] 4162 ; GFX6: $vgpr6 = COPY [[COPY12]] 4163 ; GFX6: $vgpr7 = COPY [[COPY13]] 4164 ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 4165 ; GFX7-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_offset_4064 4166 ; GFX7: bb.1 (%ir-block.0): 4167 ; GFX7: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 4168 ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 4169 ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 4170 ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 4171 ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 4172 ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 4173 ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 4174 ; GFX7: [[COPY4:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 4175 ; GFX7: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 4176 ; GFX7: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec 4177 ; GFX7: bb.2: 4178 ; GFX7: successors: %bb.3, %bb.2 4179 ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub0, implicit $exec 4180 ; GFX7: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub1, implicit $exec 4181 ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 4182 ; GFX7: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY4]], implicit $exec 4183 ; GFX7: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec 4184 ; GFX7: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec 4185 ; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 4186 ; GFX7: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY5]], implicit $exec 4187 ; GFX7: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc 4188 ; GFX7: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 4189 ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4064, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16 + 4064, align 4) 4190 ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4080, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16 + 4064, align 4) 4191 ; GFX7: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec 4192 ; GFX7: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 4193 ; GFX7: S_CBRANCH_EXECNZ %bb.2, implicit $exec 4194 ; GFX7: bb.3: 4195 ; GFX7: $exec = S_MOV_B64_term [[S_MOV_B64_term]] 4196 ; GFX7: bb.4: 4197 ; GFX7: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFSET]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFSET1]], %subreg.sub4_sub5_sub6_sub7 4198 ; GFX7: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 4199 ; GFX7: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 4200 ; GFX7: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 4201 ; GFX7: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 4202 ; GFX7: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 4203 ; GFX7: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 4204 ; GFX7: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 4205 ; GFX7: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 4206 ; GFX7: $vgpr0 = COPY [[COPY6]] 4207 ; GFX7: $vgpr1 = COPY [[COPY7]] 4208 ; GFX7: $vgpr2 = COPY [[COPY8]] 4209 ; GFX7: $vgpr3 = COPY [[COPY9]] 4210 ; GFX7: $vgpr4 = COPY [[COPY10]] 4211 ; GFX7: $vgpr5 = COPY [[COPY11]] 4212 ; GFX7: $vgpr6 = COPY [[COPY12]] 4213 ; GFX7: $vgpr7 = COPY [[COPY13]] 4214 ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 4215 ; GFX8-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_offset_4064 4216 ; GFX8: bb.1 (%ir-block.0): 4217 ; GFX8: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 4218 ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 4219 ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 4220 ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 4221 ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 4222 ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 4223 ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 4224 ; GFX8: [[COPY4:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 4225 ; GFX8: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 4226 ; GFX8: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec 4227 ; GFX8: bb.2: 4228 ; GFX8: successors: %bb.3, %bb.2 4229 ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub0, implicit $exec 4230 ; GFX8: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub1, implicit $exec 4231 ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 4232 ; GFX8: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY4]], implicit $exec 4233 ; GFX8: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec 4234 ; GFX8: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec 4235 ; GFX8: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 4236 ; GFX8: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY5]], implicit $exec 4237 ; GFX8: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc 4238 ; GFX8: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 4239 ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4064, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16 + 4064, align 4) 4240 ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4080, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16 + 4064, align 4) 4241 ; GFX8: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec 4242 ; GFX8: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 4243 ; GFX8: S_CBRANCH_EXECNZ %bb.2, implicit $exec 4244 ; GFX8: bb.3: 4245 ; GFX8: $exec = S_MOV_B64_term [[S_MOV_B64_term]] 4246 ; GFX8: bb.4: 4247 ; GFX8: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFSET]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFSET1]], %subreg.sub4_sub5_sub6_sub7 4248 ; GFX8: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 4249 ; GFX8: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 4250 ; GFX8: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 4251 ; GFX8: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 4252 ; GFX8: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 4253 ; GFX8: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 4254 ; GFX8: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 4255 ; GFX8: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 4256 ; GFX8: $vgpr0 = COPY [[COPY6]] 4257 ; GFX8: $vgpr1 = COPY [[COPY7]] 4258 ; GFX8: $vgpr2 = COPY [[COPY8]] 4259 ; GFX8: $vgpr3 = COPY [[COPY9]] 4260 ; GFX8: $vgpr4 = COPY [[COPY10]] 4261 ; GFX8: $vgpr5 = COPY [[COPY11]] 4262 ; GFX8: $vgpr6 = COPY [[COPY12]] 4263 ; GFX8: $vgpr7 = COPY [[COPY13]] 4264 ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 4265 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 4064, i32 0) 4266 ret <8 x float> %val 4267} 4268 4269define amdgpu_ps float @s_buffer_load_f32_offset_add_vgpr_sgpr(<4 x i32> inreg %rsrc, i32 %offset.v, i32 inreg %offset.s) { 4270 ; GFX6-LABEL: name: s_buffer_load_f32_offset_add_vgpr_sgpr 4271 ; GFX6: bb.1 (%ir-block.0): 4272 ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 4273 ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 4274 ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 4275 ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 4276 ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 4277 ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 4278 ; GFX6: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 4279 ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 4280 ; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4) 4281 ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] 4282 ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0 4283 ; GFX7-LABEL: name: s_buffer_load_f32_offset_add_vgpr_sgpr 4284 ; GFX7: bb.1 (%ir-block.0): 4285 ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 4286 ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 4287 ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 4288 ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 4289 ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 4290 ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 4291 ; GFX7: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 4292 ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 4293 ; GFX7: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4) 4294 ; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] 4295 ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0 4296 ; GFX8-LABEL: name: s_buffer_load_f32_offset_add_vgpr_sgpr 4297 ; GFX8: bb.1 (%ir-block.0): 4298 ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 4299 ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 4300 ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 4301 ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 4302 ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 4303 ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 4304 ; GFX8: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 4305 ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 4306 ; GFX8: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4) 4307 ; GFX8: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] 4308 ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0 4309 %offset = add i32 %offset.v, %offset.s 4310 %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %offset, i32 0) 4311 ret float %val 4312} 4313 4314define amdgpu_ps float @s_buffer_load_f32_offset_add_sgpr_vgpr(<4 x i32> inreg %rsrc, i32 %offset.v, i32 inreg %offset.s) { 4315 ; GFX6-LABEL: name: s_buffer_load_f32_offset_add_sgpr_vgpr 4316 ; GFX6: bb.1 (%ir-block.0): 4317 ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 4318 ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 4319 ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 4320 ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 4321 ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 4322 ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 4323 ; GFX6: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 4324 ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 4325 ; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4) 4326 ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] 4327 ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0 4328 ; GFX7-LABEL: name: s_buffer_load_f32_offset_add_sgpr_vgpr 4329 ; GFX7: bb.1 (%ir-block.0): 4330 ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 4331 ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 4332 ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 4333 ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 4334 ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 4335 ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 4336 ; GFX7: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 4337 ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 4338 ; GFX7: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4) 4339 ; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] 4340 ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0 4341 ; GFX8-LABEL: name: s_buffer_load_f32_offset_add_sgpr_vgpr 4342 ; GFX8: bb.1 (%ir-block.0): 4343 ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 4344 ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 4345 ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 4346 ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 4347 ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 4348 ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 4349 ; GFX8: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 4350 ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 4351 ; GFX8: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4) 4352 ; GFX8: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] 4353 ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0 4354 %offset = add i32 %offset.s, %offset.v 4355 %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %offset, i32 0) 4356 ret float %val 4357} 4358 4359define amdgpu_ps float @s_buffer_load_f32_offset_add_vgpr_sgpr_imm(<4 x i32> inreg %rsrc, i32 %offset.v, i32 inreg %offset.s) { 4360 ; GFX6-LABEL: name: s_buffer_load_f32_offset_add_vgpr_sgpr_imm 4361 ; GFX6: bb.1 (%ir-block.0): 4362 ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 4363 ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 4364 ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 4365 ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 4366 ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 4367 ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 4368 ; GFX6: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 4369 ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 4370 ; GFX6: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] 4371 ; GFX6: %9:vgpr_32, dead %17:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec 4372 ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 4373 ; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %9, [[REG_SEQUENCE]], [[S_MOV_B32_]], 1024, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4) 4374 ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] 4375 ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0 4376 ; GFX7-LABEL: name: s_buffer_load_f32_offset_add_vgpr_sgpr_imm 4377 ; GFX7: bb.1 (%ir-block.0): 4378 ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 4379 ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 4380 ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 4381 ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 4382 ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 4383 ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 4384 ; GFX7: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 4385 ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 4386 ; GFX7: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] 4387 ; GFX7: %9:vgpr_32, dead %17:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec 4388 ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 4389 ; GFX7: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %9, [[REG_SEQUENCE]], [[S_MOV_B32_]], 1024, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4) 4390 ; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] 4391 ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0 4392 ; GFX8-LABEL: name: s_buffer_load_f32_offset_add_vgpr_sgpr_imm 4393 ; GFX8: bb.1 (%ir-block.0): 4394 ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 4395 ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 4396 ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 4397 ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 4398 ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 4399 ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 4400 ; GFX8: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 4401 ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 4402 ; GFX8: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] 4403 ; GFX8: %9:vgpr_32, dead %17:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec 4404 ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 4405 ; GFX8: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %9, [[REG_SEQUENCE]], [[S_MOV_B32_]], 1024, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4) 4406 ; GFX8: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] 4407 ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0 4408 %offset.base = add i32 %offset.v, %offset.s 4409 %offset = add i32 %offset.base, 1024 4410 %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %offset, i32 0) 4411 ret float %val 4412} 4413 4414define amdgpu_ps float @s_buffer_load_f32_offset_add_sgpr_vgpr_imm(<4 x i32> inreg %rsrc, i32 %offset.v, i32 inreg %offset.s) { 4415 ; GFX6-LABEL: name: s_buffer_load_f32_offset_add_sgpr_vgpr_imm 4416 ; GFX6: bb.1 (%ir-block.0): 4417 ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 4418 ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 4419 ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 4420 ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 4421 ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 4422 ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 4423 ; GFX6: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 4424 ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 4425 ; GFX6: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] 4426 ; GFX6: %9:vgpr_32, dead %17:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY6]], [[COPY4]], 0, implicit $exec 4427 ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 4428 ; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %9, [[REG_SEQUENCE]], [[S_MOV_B32_]], 1024, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4) 4429 ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] 4430 ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0 4431 ; GFX7-LABEL: name: s_buffer_load_f32_offset_add_sgpr_vgpr_imm 4432 ; GFX7: bb.1 (%ir-block.0): 4433 ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 4434 ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 4435 ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 4436 ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 4437 ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 4438 ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 4439 ; GFX7: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 4440 ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 4441 ; GFX7: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] 4442 ; GFX7: %9:vgpr_32, dead %17:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY6]], [[COPY4]], 0, implicit $exec 4443 ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 4444 ; GFX7: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %9, [[REG_SEQUENCE]], [[S_MOV_B32_]], 1024, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4) 4445 ; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] 4446 ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0 4447 ; GFX8-LABEL: name: s_buffer_load_f32_offset_add_sgpr_vgpr_imm 4448 ; GFX8: bb.1 (%ir-block.0): 4449 ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 4450 ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 4451 ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 4452 ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 4453 ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 4454 ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 4455 ; GFX8: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 4456 ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 4457 ; GFX8: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] 4458 ; GFX8: %9:vgpr_32, dead %17:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY6]], [[COPY4]], 0, implicit $exec 4459 ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 4460 ; GFX8: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %9, [[REG_SEQUENCE]], [[S_MOV_B32_]], 1024, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4) 4461 ; GFX8: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] 4462 ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0 4463 %offset.base = add i32 %offset.s, %offset.v 4464 %offset = add i32 %offset.base, 1024 4465 %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %offset, i32 0) 4466 ret float %val 4467} 4468 4469; TODO: Ideally this would be reassociated to fold. 4470define amdgpu_ps float @s_buffer_load_f32_offset_add_imm_sgpr_vgpr(<4 x i32> inreg %rsrc, i32 %offset.v, i32 inreg %offset.s) { 4471 ; GFX6-LABEL: name: s_buffer_load_f32_offset_add_imm_sgpr_vgpr 4472 ; GFX6: bb.1 (%ir-block.0): 4473 ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 4474 ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 4475 ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 4476 ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 4477 ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 4478 ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 4479 ; GFX6: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 4480 ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 4481 ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1024 4482 ; GFX6: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY5]], [[S_MOV_B32_]], implicit-def $scc 4483 ; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_ADD_I32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4) 4484 ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] 4485 ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0 4486 ; GFX7-LABEL: name: s_buffer_load_f32_offset_add_imm_sgpr_vgpr 4487 ; GFX7: bb.1 (%ir-block.0): 4488 ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 4489 ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 4490 ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 4491 ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 4492 ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 4493 ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 4494 ; GFX7: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 4495 ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 4496 ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1024 4497 ; GFX7: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY5]], [[S_MOV_B32_]], implicit-def $scc 4498 ; GFX7: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_ADD_I32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4) 4499 ; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] 4500 ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0 4501 ; GFX8-LABEL: name: s_buffer_load_f32_offset_add_imm_sgpr_vgpr 4502 ; GFX8: bb.1 (%ir-block.0): 4503 ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 4504 ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 4505 ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 4506 ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 4507 ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 4508 ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 4509 ; GFX8: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 4510 ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 4511 ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1024 4512 ; GFX8: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY5]], [[S_MOV_B32_]], implicit-def $scc 4513 ; GFX8: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_ADD_I32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4) 4514 ; GFX8: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] 4515 ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0 4516 %offset.base = add i32 %offset.s, 1024 4517 %offset = add i32 %offset.base, %offset.v 4518 %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %offset, i32 0) 4519 ret float %val 4520} 4521 4522define amdgpu_ps float @s_buffer_load_f32_offset_add_imm_vgpr_sgpr(<4 x i32> inreg %rsrc, i32 %offset.v, i32 inreg %offset.s) { 4523 ; GFX6-LABEL: name: s_buffer_load_f32_offset_add_imm_vgpr_sgpr 4524 ; GFX6: bb.1 (%ir-block.0): 4525 ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 4526 ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 4527 ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 4528 ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 4529 ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 4530 ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 4531 ; GFX6: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 4532 ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 4533 ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1024 4534 ; GFX6: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] 4535 ; GFX6: %10:vgpr_32, dead %16:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec 4536 ; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %10, [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4) 4537 ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] 4538 ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0 4539 ; GFX7-LABEL: name: s_buffer_load_f32_offset_add_imm_vgpr_sgpr 4540 ; GFX7: bb.1 (%ir-block.0): 4541 ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 4542 ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 4543 ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 4544 ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 4545 ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 4546 ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 4547 ; GFX7: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 4548 ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 4549 ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1024 4550 ; GFX7: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] 4551 ; GFX7: %10:vgpr_32, dead %16:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec 4552 ; GFX7: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %10, [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4) 4553 ; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] 4554 ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0 4555 ; GFX8-LABEL: name: s_buffer_load_f32_offset_add_imm_vgpr_sgpr 4556 ; GFX8: bb.1 (%ir-block.0): 4557 ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 4558 ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 4559 ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 4560 ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 4561 ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 4562 ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 4563 ; GFX8: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 4564 ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 4565 ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1024 4566 ; GFX8: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] 4567 ; GFX8: %10:vgpr_32, dead %16:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec 4568 ; GFX8: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %10, [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4) 4569 ; GFX8: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] 4570 ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0 4571 %offset.base = add i32 %offset.v, 1024 4572 %offset = add i32 %offset.base, %offset.s 4573 %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %offset, i32 0) 4574 ret float %val 4575} 4576 4577declare i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32>, i32, i32 immarg) 4578declare <2 x i32> @llvm.amdgcn.s.buffer.load.v2i32(<4 x i32>, i32, i32 immarg) 4579declare <3 x i32> @llvm.amdgcn.s.buffer.load.v3i32(<4 x i32>, i32, i32 immarg) 4580declare <4 x i32> @llvm.amdgcn.s.buffer.load.v4i32(<4 x i32>, i32, i32 immarg) 4581declare <8 x i32> @llvm.amdgcn.s.buffer.load.v8i32(<4 x i32>, i32, i32 immarg) 4582declare <16 x i32> @llvm.amdgcn.s.buffer.load.v16i32(<4 x i32>, i32, i32 immarg) 4583 4584declare float @llvm.amdgcn.s.buffer.load.f32(<4 x i32>, i32, i32 immarg) 4585declare <2 x float> @llvm.amdgcn.s.buffer.load.v2f32(<4 x i32>, i32, i32 immarg) 4586declare <3 x float> @llvm.amdgcn.s.buffer.load.v3f32(<4 x i32>, i32, i32 immarg) 4587declare <4 x float> @llvm.amdgcn.s.buffer.load.v4f32(<4 x i32>, i32, i32 immarg) 4588declare <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32>, i32, i32 immarg) 4589declare <16 x float> @llvm.amdgcn.s.buffer.load.v16f32(<4 x i32>, i32, i32 immarg) 4590 4591declare i96 @llvm.amdgcn.s.buffer.load.i96(<4 x i32>, i32, i32 immarg) 4592declare i256 @llvm.amdgcn.s.buffer.load.i256(<4 x i32>, i32, i32 immarg) 4593declare i512 @llvm.amdgcn.s.buffer.load.i512(<4 x i32>, i32, i32 immarg) 4594 4595declare <16 x i16> @llvm.amdgcn.s.buffer.load.v16i16(<4 x i32>, i32, i32 immarg) 4596declare <32 x i16> @llvm.amdgcn.s.buffer.load.v32i16(<4 x i32>, i32, i32 immarg) 4597 4598declare <4 x i64> @llvm.amdgcn.s.buffer.load.v4i64(<4 x i32>, i32, i32 immarg) 4599declare <8 x i64> @llvm.amdgcn.s.buffer.load.v8i64(<4 x i32>, i32, i32 immarg) 4600 4601declare <4 x i8 addrspace(1)*> @llvm.amdgcn.s.buffer.load.v4p1i8(<4 x i32>, i32, i32 immarg) 4602declare <8 x i8 addrspace(1)*> @llvm.amdgcn.s.buffer.load.v8p1i8(<4 x i32>, i32, i32 immarg) 4603