1; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py 2; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tonga -stop-after=legalizer -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GCN %s 3 4define amdgpu_ps float @image_load_f32(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { 5 ; GCN-LABEL: name: image_load_f32 6 ; GCN: bb.1 (%ir-block.0): 7 ; GCN: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 8 ; GCN: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 9 ; GCN: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 10 ; GCN: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 11 ; GCN: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 12 ; GCN: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 13 ; GCN: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 14 ; GCN: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 15 ; GCN: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 16 ; GCN: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 17 ; GCN: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 18 ; GCN: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 19 ; GCN: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) 20 ; GCN: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 4 from custom "TargetCustom8") 21 ; GCN: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) 22 ; GCN: SI_RETURN_TO_EPILOG implicit $vgpr0 23 %tex = call float @llvm.amdgcn.image.load.2d.f32.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) 24 ret float %tex 25} 26 27define amdgpu_ps <2 x float> @image_load_v2f32(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { 28 ; GCN-LABEL: name: image_load_v2f32 29 ; GCN: bb.1 (%ir-block.0): 30 ; GCN: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 31 ; GCN: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 32 ; GCN: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 33 ; GCN: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 34 ; GCN: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 35 ; GCN: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 36 ; GCN: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 37 ; GCN: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 38 ; GCN: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 39 ; GCN: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 40 ; GCN: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 41 ; GCN: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 42 ; GCN: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) 43 ; GCN: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 8 from custom "TargetCustom8") 44 ; GCN: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) 45 ; GCN: $vgpr0 = COPY [[UV]](s32) 46 ; GCN: $vgpr1 = COPY [[UV1]](s32) 47 ; GCN: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 48 %tex = call <2 x float> @llvm.amdgcn.image.load.2d.v2f32.i32(i32 3, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) 49 ret <2 x float> %tex 50} 51 52define amdgpu_ps <3 x float> @image_load_v3f32(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { 53 ; GCN-LABEL: name: image_load_v3f32 54 ; GCN: bb.1 (%ir-block.0): 55 ; GCN: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 56 ; GCN: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 57 ; GCN: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 58 ; GCN: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 59 ; GCN: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 60 ; GCN: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 61 ; GCN: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 62 ; GCN: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 63 ; GCN: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 64 ; GCN: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 65 ; GCN: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 66 ; GCN: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 67 ; GCN: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) 68 ; GCN: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 12 from custom "TargetCustom8", align 16) 69 ; GCN: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<3 x s32>) 70 ; GCN: $vgpr0 = COPY [[UV]](s32) 71 ; GCN: $vgpr1 = COPY [[UV1]](s32) 72 ; GCN: $vgpr2 = COPY [[UV2]](s32) 73 ; GCN: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 74 %tex = call <3 x float> @llvm.amdgcn.image.load.2d.v3f32.i32(i32 7, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) 75 ret <3 x float> %tex 76} 77 78define amdgpu_ps <4 x float> @image_load_v4f32(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { 79 ; GCN-LABEL: name: image_load_v4f32 80 ; GCN: bb.1 (%ir-block.0): 81 ; GCN: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 82 ; GCN: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 83 ; GCN: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 84 ; GCN: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 85 ; GCN: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 86 ; GCN: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 87 ; GCN: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 88 ; GCN: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 89 ; GCN: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 90 ; GCN: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 91 ; GCN: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 92 ; GCN: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 93 ; GCN: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) 94 ; GCN: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 15, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 16 from custom "TargetCustom8") 95 ; GCN: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) 96 ; GCN: $vgpr0 = COPY [[UV]](s32) 97 ; GCN: $vgpr1 = COPY [[UV1]](s32) 98 ; GCN: $vgpr2 = COPY [[UV2]](s32) 99 ; GCN: $vgpr3 = COPY [[UV3]](s32) 100 ; GCN: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 101 %tex = call <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32(i32 15, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) 102 ret <4 x float> %tex 103} 104 105define amdgpu_ps float @image_load_tfe_f32(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { 106 ; GCN-LABEL: name: image_load_tfe_f32 107 ; GCN: bb.1 (%ir-block.0): 108 ; GCN: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 109 ; GCN: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 110 ; GCN: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 111 ; GCN: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 112 ; GCN: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 113 ; GCN: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 114 ; GCN: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 115 ; GCN: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 116 ; GCN: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 117 ; GCN: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 118 ; GCN: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 119 ; GCN: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 120 ; GCN: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF 121 ; GCN: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) 122 ; GCN: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 4 from custom "TargetCustom8") 123 ; GCN: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) 124 ; GCN: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1) 125 ; GCN: $vgpr0 = COPY [[UV]](s32) 126 ; GCN: SI_RETURN_TO_EPILOG implicit $vgpr0 127 %res = call { float, i32 } @llvm.amdgcn.image.load.2d.sl_f32i32s.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) 128 %tex = extractvalue { float, i32 } %res, 0 129 %tfe = extractvalue { float, i32 } %res, 1 130 store i32 %tfe, i32 addrspace(1)* undef 131 ret float %tex 132} 133 134define amdgpu_ps <2 x float> @image_load_tfe_v2f32(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { 135 ; GCN-LABEL: name: image_load_tfe_v2f32 136 ; GCN: bb.1 (%ir-block.0): 137 ; GCN: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 138 ; GCN: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 139 ; GCN: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 140 ; GCN: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 141 ; GCN: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 142 ; GCN: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 143 ; GCN: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 144 ; GCN: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 145 ; GCN: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 146 ; GCN: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 147 ; GCN: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 148 ; GCN: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 149 ; GCN: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF 150 ; GCN: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) 151 ; GCN: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 8 from custom "TargetCustom8") 152 ; GCN: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<3 x s32>) 153 ; GCN: G_STORE [[UV2]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1) 154 ; GCN: $vgpr0 = COPY [[UV]](s32) 155 ; GCN: $vgpr1 = COPY [[UV1]](s32) 156 ; GCN: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 157 %res = call { <2 x float>, i32 } @llvm.amdgcn.image.load.2d.sl_v2f32i32s.i32(i32 3, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) 158 %tex = extractvalue { <2 x float>, i32 } %res, 0 159 %tfe = extractvalue { <2 x float>, i32 } %res, 1 160 store i32 %tfe, i32 addrspace(1)* undef 161 ret <2 x float> %tex 162} 163 164define amdgpu_ps <3 x float> @image_load_tfe_v3f32(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { 165 ; GCN-LABEL: name: image_load_tfe_v3f32 166 ; GCN: bb.1 (%ir-block.0): 167 ; GCN: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 168 ; GCN: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 169 ; GCN: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 170 ; GCN: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 171 ; GCN: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 172 ; GCN: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 173 ; GCN: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 174 ; GCN: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 175 ; GCN: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 176 ; GCN: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 177 ; GCN: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 178 ; GCN: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 179 ; GCN: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF 180 ; GCN: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) 181 ; GCN: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 12 from custom "TargetCustom8", align 16) 182 ; GCN: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) 183 ; GCN: G_STORE [[UV3]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1) 184 ; GCN: $vgpr0 = COPY [[UV]](s32) 185 ; GCN: $vgpr1 = COPY [[UV1]](s32) 186 ; GCN: $vgpr2 = COPY [[UV2]](s32) 187 ; GCN: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 188 %res = call { <3 x float>, i32 } @llvm.amdgcn.image.load.2d.sl_v3f32i32s.i32(i32 7, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) 189 %tex = extractvalue { <3 x float>, i32 } %res, 0 190 %tfe = extractvalue { <3 x float>, i32 } %res, 1 191 store i32 %tfe, i32 addrspace(1)* undef 192 ret <3 x float> %tex 193} 194 195define amdgpu_ps <4 x float> @image_load_tfe_v4f32(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { 196 ; GCN-LABEL: name: image_load_tfe_v4f32 197 ; GCN: bb.1 (%ir-block.0): 198 ; GCN: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 199 ; GCN: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 200 ; GCN: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 201 ; GCN: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 202 ; GCN: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 203 ; GCN: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 204 ; GCN: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 205 ; GCN: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 206 ; GCN: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 207 ; GCN: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 208 ; GCN: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 209 ; GCN: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 210 ; GCN: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF 211 ; GCN: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) 212 ; GCN: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<5 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 15, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 16 from custom "TargetCustom8") 213 ; GCN: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<5 x s32>) 214 ; GCN: G_STORE [[UV4]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1) 215 ; GCN: $vgpr0 = COPY [[UV]](s32) 216 ; GCN: $vgpr1 = COPY [[UV1]](s32) 217 ; GCN: $vgpr2 = COPY [[UV2]](s32) 218 ; GCN: $vgpr3 = COPY [[UV3]](s32) 219 ; GCN: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 220 %res = call { <4 x float>, i32 } @llvm.amdgcn.image.load.2d.sl_v4f32i32s.i32(i32 15, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) 221 %tex = extractvalue { <4 x float>, i32 } %res, 0 222 %tfe = extractvalue { <4 x float>, i32 } %res, 1 223 store i32 %tfe, i32 addrspace(1)* undef 224 ret <4 x float> %tex 225} 226 227define amdgpu_ps float @image_load_f32_dmask_0000(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { 228 ; GCN-LABEL: name: image_load_f32_dmask_0000 229 ; GCN: bb.1 (%ir-block.0): 230 ; GCN: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 231 ; GCN: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 232 ; GCN: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 233 ; GCN: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 234 ; GCN: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 235 ; GCN: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 236 ; GCN: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 237 ; GCN: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 238 ; GCN: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 239 ; GCN: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 240 ; GCN: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 241 ; GCN: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF 242 ; GCN: $vgpr0 = COPY [[DEF]](s32) 243 ; GCN: SI_RETURN_TO_EPILOG implicit $vgpr0 244 %tex = call float @llvm.amdgcn.image.load.2d.f32.i32(i32 0, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) 245 ret float %tex 246} 247 248define amdgpu_ps <2 x float> @image_load_v2f32_dmask_1000(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { 249 ; GCN-LABEL: name: image_load_v2f32_dmask_1000 250 ; GCN: bb.1 (%ir-block.0): 251 ; GCN: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 252 ; GCN: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 253 ; GCN: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 254 ; GCN: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 255 ; GCN: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 256 ; GCN: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 257 ; GCN: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 258 ; GCN: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 259 ; GCN: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 260 ; GCN: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 261 ; GCN: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 262 ; GCN: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 263 ; GCN: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) 264 ; GCN: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 4 from custom "TargetCustom8") 265 ; GCN: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF 266 ; GCN: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) 267 ; GCN: $vgpr1 = COPY [[DEF]](s32) 268 ; GCN: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 269 %tex = call <2 x float> @llvm.amdgcn.image.load.2d.v2f32.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) 270 ret <2 x float> %tex 271} 272 273define amdgpu_ps <2 x float> @image_load_v2f32_dmask_0000(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { 274 ; GCN-LABEL: name: image_load_v2f32_dmask_0000 275 ; GCN: bb.1 (%ir-block.0): 276 ; GCN: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 277 ; GCN: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 278 ; GCN: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 279 ; GCN: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 280 ; GCN: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 281 ; GCN: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 282 ; GCN: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 283 ; GCN: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 284 ; GCN: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 285 ; GCN: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 286 ; GCN: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 287 ; GCN: [[DEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF 288 ; GCN: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<2 x s32>) 289 ; GCN: $vgpr0 = COPY [[UV]](s32) 290 ; GCN: $vgpr1 = COPY [[UV1]](s32) 291 ; GCN: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 292 %tex = call <2 x float> @llvm.amdgcn.image.load.2d.v2f32.i32(i32 0, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) 293 ret <2 x float> %tex 294} 295 296define amdgpu_ps <3 x float> @image_load_v3f32_dmask_1100(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { 297 ; GCN-LABEL: name: image_load_v3f32_dmask_1100 298 ; GCN: bb.1 (%ir-block.0): 299 ; GCN: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 300 ; GCN: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 301 ; GCN: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 302 ; GCN: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 303 ; GCN: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 304 ; GCN: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 305 ; GCN: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 306 ; GCN: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 307 ; GCN: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 308 ; GCN: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 309 ; GCN: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 310 ; GCN: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 311 ; GCN: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) 312 ; GCN: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 8 from custom "TargetCustom8") 313 ; GCN: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) 314 ; GCN: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF 315 ; GCN: $vgpr0 = COPY [[UV]](s32) 316 ; GCN: $vgpr1 = COPY [[UV1]](s32) 317 ; GCN: $vgpr2 = COPY [[DEF]](s32) 318 ; GCN: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 319 %tex = call <3 x float> @llvm.amdgcn.image.load.2d.v3f32.i32(i32 3, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) 320 ret <3 x float> %tex 321} 322 323define amdgpu_ps <3 x float> @image_load_v3f32_dmask_1000(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { 324 ; GCN-LABEL: name: image_load_v3f32_dmask_1000 325 ; GCN: bb.1 (%ir-block.0): 326 ; GCN: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 327 ; GCN: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 328 ; GCN: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 329 ; GCN: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 330 ; GCN: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 331 ; GCN: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 332 ; GCN: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 333 ; GCN: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 334 ; GCN: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 335 ; GCN: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 336 ; GCN: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 337 ; GCN: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 338 ; GCN: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) 339 ; GCN: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 4 from custom "TargetCustom8") 340 ; GCN: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF 341 ; GCN: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) 342 ; GCN: $vgpr1 = COPY [[DEF]](s32) 343 ; GCN: $vgpr2 = COPY [[DEF]](s32) 344 ; GCN: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 345 %tex = call <3 x float> @llvm.amdgcn.image.load.2d.v3f32.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) 346 ret <3 x float> %tex 347} 348 349define amdgpu_ps <3 x float> @image_load_v3f32_dmask_0000(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { 350 ; GCN-LABEL: name: image_load_v3f32_dmask_0000 351 ; GCN: bb.1 (%ir-block.0): 352 ; GCN: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 353 ; GCN: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 354 ; GCN: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 355 ; GCN: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 356 ; GCN: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 357 ; GCN: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 358 ; GCN: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 359 ; GCN: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 360 ; GCN: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 361 ; GCN: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 362 ; GCN: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 363 ; GCN: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF 364 ; GCN: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<3 x s32>) 365 ; GCN: $vgpr0 = COPY [[UV]](s32) 366 ; GCN: $vgpr1 = COPY [[UV1]](s32) 367 ; GCN: $vgpr2 = COPY [[UV2]](s32) 368 ; GCN: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 369 %tex = call <3 x float> @llvm.amdgcn.image.load.2d.v3f32.i32(i32 0, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) 370 ret <3 x float> %tex 371} 372 373define amdgpu_ps <4 x float> @image_load_v4f32_dmask_1110(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { 374 ; GCN-LABEL: name: image_load_v4f32_dmask_1110 375 ; GCN: bb.1 (%ir-block.0): 376 ; GCN: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 377 ; GCN: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 378 ; GCN: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 379 ; GCN: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 380 ; GCN: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 381 ; GCN: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 382 ; GCN: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 383 ; GCN: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 384 ; GCN: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 385 ; GCN: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 386 ; GCN: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 387 ; GCN: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 388 ; GCN: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) 389 ; GCN: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 12 from custom "TargetCustom8", align 16) 390 ; GCN: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<3 x s32>) 391 ; GCN: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF 392 ; GCN: $vgpr0 = COPY [[UV]](s32) 393 ; GCN: $vgpr1 = COPY [[UV1]](s32) 394 ; GCN: $vgpr2 = COPY [[UV2]](s32) 395 ; GCN: $vgpr3 = COPY [[DEF]](s32) 396 ; GCN: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 397 %tex = call <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32(i32 7, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) 398 ret <4 x float> %tex 399} 400 401define amdgpu_ps <4 x float> @image_load_v4f32_dmask_1100(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { 402 ; GCN-LABEL: name: image_load_v4f32_dmask_1100 403 ; GCN: bb.1 (%ir-block.0): 404 ; GCN: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 405 ; GCN: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 406 ; GCN: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 407 ; GCN: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 408 ; GCN: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 409 ; GCN: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 410 ; GCN: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 411 ; GCN: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 412 ; GCN: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 413 ; GCN: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 414 ; GCN: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 415 ; GCN: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 416 ; GCN: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) 417 ; GCN: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 8 from custom "TargetCustom8") 418 ; GCN: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) 419 ; GCN: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF 420 ; GCN: $vgpr0 = COPY [[UV]](s32) 421 ; GCN: $vgpr1 = COPY [[UV1]](s32) 422 ; GCN: $vgpr2 = COPY [[DEF]](s32) 423 ; GCN: $vgpr3 = COPY [[DEF]](s32) 424 ; GCN: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 425 %tex = call <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32(i32 3, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) 426 ret <4 x float> %tex 427} 428 429define amdgpu_ps <4 x float> @image_load_v4f32_dmask_1000(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { 430 ; GCN-LABEL: name: image_load_v4f32_dmask_1000 431 ; GCN: bb.1 (%ir-block.0): 432 ; GCN: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 433 ; GCN: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 434 ; GCN: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 435 ; GCN: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 436 ; GCN: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 437 ; GCN: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 438 ; GCN: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 439 ; GCN: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 440 ; GCN: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 441 ; GCN: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 442 ; GCN: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 443 ; GCN: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 444 ; GCN: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) 445 ; GCN: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 4 from custom "TargetCustom8") 446 ; GCN: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF 447 ; GCN: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) 448 ; GCN: $vgpr1 = COPY [[DEF]](s32) 449 ; GCN: $vgpr2 = COPY [[DEF]](s32) 450 ; GCN: $vgpr3 = COPY [[DEF]](s32) 451 ; GCN: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 452 %tex = call <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) 453 ret <4 x float> %tex 454} 455 456define amdgpu_ps <4 x float> @image_load_v4f32_dmask_0000(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { 457 ; GCN-LABEL: name: image_load_v4f32_dmask_0000 458 ; GCN: bb.1 (%ir-block.0): 459 ; GCN: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 460 ; GCN: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 461 ; GCN: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 462 ; GCN: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 463 ; GCN: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 464 ; GCN: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 465 ; GCN: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 466 ; GCN: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 467 ; GCN: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 468 ; GCN: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 469 ; GCN: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 470 ; GCN: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF 471 ; GCN: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<4 x s32>) 472 ; GCN: $vgpr0 = COPY [[UV]](s32) 473 ; GCN: $vgpr1 = COPY [[UV1]](s32) 474 ; GCN: $vgpr2 = COPY [[UV2]](s32) 475 ; GCN: $vgpr3 = COPY [[UV3]](s32) 476 ; GCN: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 477 %tex = call <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32(i32 0, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) 478 ret <4 x float> %tex 479} 480 481define amdgpu_ps float @image_load_tfe_f32_dmask_0000(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { 482 ; GCN-LABEL: name: image_load_tfe_f32_dmask_0000 483 ; GCN: bb.1 (%ir-block.0): 484 ; GCN: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 485 ; GCN: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 486 ; GCN: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 487 ; GCN: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 488 ; GCN: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 489 ; GCN: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 490 ; GCN: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 491 ; GCN: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 492 ; GCN: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 493 ; GCN: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 494 ; GCN: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 495 ; GCN: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 496 ; GCN: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF 497 ; GCN: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) 498 ; GCN: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 4 from custom "TargetCustom8") 499 ; GCN: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) 500 ; GCN: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1) 501 ; GCN: $vgpr0 = COPY [[UV]](s32) 502 ; GCN: SI_RETURN_TO_EPILOG implicit $vgpr0 503 %res = call { float, i32 } @llvm.amdgcn.image.load.2d.sl_f32i32s.i32(i32 0, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) 504 %tex = extractvalue { float, i32 } %res, 0 505 %tfe = extractvalue { float, i32 } %res, 1 506 store i32 %tfe, i32 addrspace(1)* undef 507 ret float %tex 508} 509 510define amdgpu_ps <2 x float> @image_load_tfe_v2f32_dmask_1000(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { 511 ; GCN-LABEL: name: image_load_tfe_v2f32_dmask_1000 512 ; GCN: bb.1 (%ir-block.0): 513 ; GCN: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 514 ; GCN: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 515 ; GCN: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 516 ; GCN: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 517 ; GCN: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 518 ; GCN: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 519 ; GCN: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 520 ; GCN: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 521 ; GCN: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 522 ; GCN: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 523 ; GCN: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 524 ; GCN: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 525 ; GCN: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF 526 ; GCN: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) 527 ; GCN: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 4 from custom "TargetCustom8") 528 ; GCN: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) 529 ; GCN: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF 530 ; GCN: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1) 531 ; GCN: $vgpr0 = COPY [[UV]](s32) 532 ; GCN: $vgpr1 = COPY [[DEF1]](s32) 533 ; GCN: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 534 %res = call { <2 x float>, i32 } @llvm.amdgcn.image.load.2d.sl_v2f32i32s.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) 535 %tex = extractvalue { <2 x float>, i32 } %res, 0 536 %tfe = extractvalue { <2 x float>, i32 } %res, 1 537 store i32 %tfe, i32 addrspace(1)* undef 538 ret <2 x float> %tex 539} 540 541define amdgpu_ps <2 x float> @image_load_tfe_v2f32_dmask_0000(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { 542 ; GCN-LABEL: name: image_load_tfe_v2f32_dmask_0000 543 ; GCN: bb.1 (%ir-block.0): 544 ; GCN: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 545 ; GCN: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 546 ; GCN: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 547 ; GCN: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 548 ; GCN: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 549 ; GCN: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 550 ; GCN: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 551 ; GCN: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 552 ; GCN: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 553 ; GCN: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 554 ; GCN: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 555 ; GCN: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 556 ; GCN: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF 557 ; GCN: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) 558 ; GCN: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 4 from custom "TargetCustom8") 559 ; GCN: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) 560 ; GCN: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF 561 ; GCN: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1) 562 ; GCN: $vgpr0 = COPY [[UV]](s32) 563 ; GCN: $vgpr1 = COPY [[DEF1]](s32) 564 ; GCN: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 565 %res = call { <2 x float>, i32 } @llvm.amdgcn.image.load.2d.sl_v2f32i32s.i32(i32 0, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) 566 %tex = extractvalue { <2 x float>, i32 } %res, 0 567 %tfe = extractvalue { <2 x float>, i32 } %res, 1 568 store i32 %tfe, i32 addrspace(1)* undef 569 ret <2 x float> %tex 570} 571 572define amdgpu_ps <3 x float> @image_load_tfe_v3f32_dmask_1100(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { 573 ; GCN-LABEL: name: image_load_tfe_v3f32_dmask_1100 574 ; GCN: bb.1 (%ir-block.0): 575 ; GCN: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 576 ; GCN: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 577 ; GCN: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 578 ; GCN: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 579 ; GCN: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 580 ; GCN: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 581 ; GCN: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 582 ; GCN: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 583 ; GCN: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 584 ; GCN: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 585 ; GCN: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 586 ; GCN: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 587 ; GCN: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF 588 ; GCN: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) 589 ; GCN: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 8 from custom "TargetCustom8") 590 ; GCN: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<3 x s32>) 591 ; GCN: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF 592 ; GCN: G_STORE [[UV2]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1) 593 ; GCN: $vgpr0 = COPY [[UV]](s32) 594 ; GCN: $vgpr1 = COPY [[UV1]](s32) 595 ; GCN: $vgpr2 = COPY [[DEF1]](s32) 596 ; GCN: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 597 %res = call { <3 x float>, i32 } @llvm.amdgcn.image.load.2d.sl_v3f32i32s.i32(i32 3, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) 598 %tex = extractvalue { <3 x float>, i32 } %res, 0 599 %tfe = extractvalue { <3 x float>, i32 } %res, 1 600 store i32 %tfe, i32 addrspace(1)* undef 601 ret <3 x float> %tex 602} 603 604define amdgpu_ps <3 x float> @image_load_tfe_v3f32_dmask_1000(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { 605 ; GCN-LABEL: name: image_load_tfe_v3f32_dmask_1000 606 ; GCN: bb.1 (%ir-block.0): 607 ; GCN: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 608 ; GCN: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 609 ; GCN: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 610 ; GCN: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 611 ; GCN: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 612 ; GCN: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 613 ; GCN: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 614 ; GCN: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 615 ; GCN: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 616 ; GCN: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 617 ; GCN: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 618 ; GCN: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 619 ; GCN: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF 620 ; GCN: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) 621 ; GCN: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 4 from custom "TargetCustom8") 622 ; GCN: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) 623 ; GCN: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF 624 ; GCN: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1) 625 ; GCN: $vgpr0 = COPY [[UV]](s32) 626 ; GCN: $vgpr1 = COPY [[DEF1]](s32) 627 ; GCN: $vgpr2 = COPY [[DEF1]](s32) 628 ; GCN: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 629 %res = call { <3 x float>, i32 } @llvm.amdgcn.image.load.2d.sl_v3f32i32s.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) 630 %tex = extractvalue { <3 x float>, i32 } %res, 0 631 %tfe = extractvalue { <3 x float>, i32 } %res, 1 632 store i32 %tfe, i32 addrspace(1)* undef 633 ret <3 x float> %tex 634} 635 636define amdgpu_ps <3 x float> @image_load_tfe_v3f32_dmask_0000(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { 637 ; GCN-LABEL: name: image_load_tfe_v3f32_dmask_0000 638 ; GCN: bb.1 (%ir-block.0): 639 ; GCN: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 640 ; GCN: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 641 ; GCN: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 642 ; GCN: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 643 ; GCN: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 644 ; GCN: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 645 ; GCN: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 646 ; GCN: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 647 ; GCN: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 648 ; GCN: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 649 ; GCN: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 650 ; GCN: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 651 ; GCN: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF 652 ; GCN: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) 653 ; GCN: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 4 from custom "TargetCustom8") 654 ; GCN: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) 655 ; GCN: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF 656 ; GCN: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1) 657 ; GCN: $vgpr0 = COPY [[UV]](s32) 658 ; GCN: $vgpr1 = COPY [[DEF1]](s32) 659 ; GCN: $vgpr2 = COPY [[DEF1]](s32) 660 ; GCN: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 661 %res = call { <3 x float>, i32 } @llvm.amdgcn.image.load.2d.sl_v3f32i32s.i32(i32 0, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) 662 %tex = extractvalue { <3 x float>, i32 } %res, 0 663 %tfe = extractvalue { <3 x float>, i32 } %res, 1 664 store i32 %tfe, i32 addrspace(1)* undef 665 ret <3 x float> %tex 666} 667 668define amdgpu_ps <4 x float> @image_load_tfe_v4f32_dmask_1110(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { 669 ; GCN-LABEL: name: image_load_tfe_v4f32_dmask_1110 670 ; GCN: bb.1 (%ir-block.0): 671 ; GCN: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 672 ; GCN: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 673 ; GCN: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 674 ; GCN: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 675 ; GCN: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 676 ; GCN: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 677 ; GCN: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 678 ; GCN: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 679 ; GCN: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 680 ; GCN: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 681 ; GCN: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 682 ; GCN: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 683 ; GCN: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF 684 ; GCN: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) 685 ; GCN: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 12 from custom "TargetCustom8", align 16) 686 ; GCN: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) 687 ; GCN: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF 688 ; GCN: G_STORE [[UV3]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1) 689 ; GCN: $vgpr0 = COPY [[UV]](s32) 690 ; GCN: $vgpr1 = COPY [[UV1]](s32) 691 ; GCN: $vgpr2 = COPY [[UV2]](s32) 692 ; GCN: $vgpr3 = COPY [[DEF1]](s32) 693 ; GCN: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 694 %res = call { <4 x float>, i32 } @llvm.amdgcn.image.load.2d.sl_v4f32i32s.i32(i32 7, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) 695 %tex = extractvalue { <4 x float>, i32 } %res, 0 696 %tfe = extractvalue { <4 x float>, i32 } %res, 1 697 store i32 %tfe, i32 addrspace(1)* undef 698 ret <4 x float> %tex 699} 700 701define amdgpu_ps <4 x float> @image_load_tfe_v4f32_dmask_1100(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { 702 ; GCN-LABEL: name: image_load_tfe_v4f32_dmask_1100 703 ; GCN: bb.1 (%ir-block.0): 704 ; GCN: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 705 ; GCN: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 706 ; GCN: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 707 ; GCN: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 708 ; GCN: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 709 ; GCN: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 710 ; GCN: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 711 ; GCN: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 712 ; GCN: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 713 ; GCN: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 714 ; GCN: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 715 ; GCN: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 716 ; GCN: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF 717 ; GCN: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) 718 ; GCN: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 8 from custom "TargetCustom8") 719 ; GCN: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<3 x s32>) 720 ; GCN: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF 721 ; GCN: G_STORE [[UV2]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1) 722 ; GCN: $vgpr0 = COPY [[UV]](s32) 723 ; GCN: $vgpr1 = COPY [[UV1]](s32) 724 ; GCN: $vgpr2 = COPY [[DEF1]](s32) 725 ; GCN: $vgpr3 = COPY [[DEF1]](s32) 726 ; GCN: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 727 %res = call { <4 x float>, i32 } @llvm.amdgcn.image.load.2d.sl_v4f32i32s.i32(i32 3, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) 728 %tex = extractvalue { <4 x float>, i32 } %res, 0 729 %tfe = extractvalue { <4 x float>, i32 } %res, 1 730 store i32 %tfe, i32 addrspace(1)* undef 731 ret <4 x float> %tex 732} 733 734define amdgpu_ps <4 x float> @image_load_tfe_v4f32_dmask_1000(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { 735 ; GCN-LABEL: name: image_load_tfe_v4f32_dmask_1000 736 ; GCN: bb.1 (%ir-block.0): 737 ; GCN: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 738 ; GCN: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 739 ; GCN: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 740 ; GCN: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 741 ; GCN: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 742 ; GCN: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 743 ; GCN: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 744 ; GCN: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 745 ; GCN: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 746 ; GCN: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 747 ; GCN: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 748 ; GCN: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 749 ; GCN: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF 750 ; GCN: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) 751 ; GCN: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 4 from custom "TargetCustom8") 752 ; GCN: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) 753 ; GCN: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF 754 ; GCN: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1) 755 ; GCN: $vgpr0 = COPY [[UV]](s32) 756 ; GCN: $vgpr1 = COPY [[DEF1]](s32) 757 ; GCN: $vgpr2 = COPY [[DEF1]](s32) 758 ; GCN: $vgpr3 = COPY [[DEF1]](s32) 759 ; GCN: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 760 %res = call { <4 x float>, i32 } @llvm.amdgcn.image.load.2d.sl_v4f32i32s.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) 761 %tex = extractvalue { <4 x float>, i32 } %res, 0 762 %tfe = extractvalue { <4 x float>, i32 } %res, 1 763 store i32 %tfe, i32 addrspace(1)* undef 764 ret <4 x float> %tex 765} 766 767define amdgpu_ps <4 x float> @image_load_tfe_v4f32_dmask_0000(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { 768 ; GCN-LABEL: name: image_load_tfe_v4f32_dmask_0000 769 ; GCN: bb.1 (%ir-block.0): 770 ; GCN: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 771 ; GCN: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 772 ; GCN: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 773 ; GCN: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 774 ; GCN: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 775 ; GCN: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 776 ; GCN: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 777 ; GCN: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 778 ; GCN: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 779 ; GCN: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 780 ; GCN: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 781 ; GCN: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 782 ; GCN: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF 783 ; GCN: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) 784 ; GCN: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 4 from custom "TargetCustom8") 785 ; GCN: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) 786 ; GCN: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF 787 ; GCN: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1) 788 ; GCN: $vgpr0 = COPY [[UV]](s32) 789 ; GCN: $vgpr1 = COPY [[DEF1]](s32) 790 ; GCN: $vgpr2 = COPY [[DEF1]](s32) 791 ; GCN: $vgpr3 = COPY [[DEF1]](s32) 792 ; GCN: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 793 %res = call { <4 x float>, i32 } @llvm.amdgcn.image.load.2d.sl_v4f32i32s.i32(i32 0, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) 794 %tex = extractvalue { <4 x float>, i32 } %res, 0 795 %tfe = extractvalue { <4 x float>, i32 } %res, 1 796 store i32 %tfe, i32 addrspace(1)* undef 797 ret <4 x float> %tex 798} 799 800declare float @llvm.amdgcn.image.load.2d.f32.i32(i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0 801declare <2 x float> @llvm.amdgcn.image.load.2d.v2f32.i32(i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0 802declare <3 x float> @llvm.amdgcn.image.load.2d.v3f32.i32(i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0 803declare <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32(i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0 804declare { float, i32 } @llvm.amdgcn.image.load.2d.sl_f32i32s.i32(i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0 805declare { <2 x float>, i32 } @llvm.amdgcn.image.load.2d.sl_v2f32i32s.i32(i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0 806declare { <3 x float>, i32 } @llvm.amdgcn.image.load.2d.sl_v3f32i32s.i32(i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0 807declare { <4 x float>, i32 } @llvm.amdgcn.image.load.2d.sl_v4f32i32s.i32(i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0 808 809attributes #0 = { nounwind readonly } 810