1; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tonga -stop-after=legalizer -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GCN %s
3
4define amdgpu_ps float @image_load_f32(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
5  ; GCN-LABEL: name: image_load_f32
6  ; GCN: bb.1 (%ir-block.0):
7  ; GCN:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
8  ; GCN:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
9  ; GCN:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
10  ; GCN:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
11  ; GCN:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
12  ; GCN:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
13  ; GCN:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
14  ; GCN:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
15  ; GCN:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
16  ; GCN:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
17  ; GCN:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
18  ; GCN:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
19  ; GCN:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
20  ; GCN:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 4 from custom "TargetCustom8")
21  ; GCN:   $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
22  ; GCN:   SI_RETURN_TO_EPILOG implicit $vgpr0
23  %tex = call float @llvm.amdgcn.image.load.2d.f32.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
24  ret float %tex
25}
26
27define amdgpu_ps <2 x float> @image_load_v2f32(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
28  ; GCN-LABEL: name: image_load_v2f32
29  ; GCN: bb.1 (%ir-block.0):
30  ; GCN:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
31  ; GCN:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
32  ; GCN:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
33  ; GCN:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
34  ; GCN:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
35  ; GCN:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
36  ; GCN:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
37  ; GCN:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
38  ; GCN:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
39  ; GCN:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
40  ; GCN:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
41  ; GCN:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
42  ; GCN:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
43  ; GCN:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 8 from custom "TargetCustom8")
44  ; GCN:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>)
45  ; GCN:   $vgpr0 = COPY [[UV]](s32)
46  ; GCN:   $vgpr1 = COPY [[UV1]](s32)
47  ; GCN:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
48  %tex = call <2 x float> @llvm.amdgcn.image.load.2d.v2f32.i32(i32 3, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
49  ret <2 x float> %tex
50}
51
52define amdgpu_ps <3 x float> @image_load_v3f32(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
53  ; GCN-LABEL: name: image_load_v3f32
54  ; GCN: bb.1 (%ir-block.0):
55  ; GCN:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
56  ; GCN:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
57  ; GCN:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
58  ; GCN:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
59  ; GCN:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
60  ; GCN:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
61  ; GCN:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
62  ; GCN:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
63  ; GCN:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
64  ; GCN:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
65  ; GCN:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
66  ; GCN:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
67  ; GCN:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
68  ; GCN:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 12 from custom "TargetCustom8", align 16)
69  ; GCN:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<3 x s32>)
70  ; GCN:   $vgpr0 = COPY [[UV]](s32)
71  ; GCN:   $vgpr1 = COPY [[UV1]](s32)
72  ; GCN:   $vgpr2 = COPY [[UV2]](s32)
73  ; GCN:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2
74  %tex = call <3 x float> @llvm.amdgcn.image.load.2d.v3f32.i32(i32 7, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
75  ret <3 x float> %tex
76}
77
78define amdgpu_ps <4 x float> @image_load_v4f32(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
79  ; GCN-LABEL: name: image_load_v4f32
80  ; GCN: bb.1 (%ir-block.0):
81  ; GCN:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
82  ; GCN:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
83  ; GCN:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
84  ; GCN:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
85  ; GCN:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
86  ; GCN:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
87  ; GCN:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
88  ; GCN:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
89  ; GCN:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
90  ; GCN:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
91  ; GCN:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
92  ; GCN:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
93  ; GCN:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
94  ; GCN:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 15, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 16 from custom "TargetCustom8")
95  ; GCN:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
96  ; GCN:   $vgpr0 = COPY [[UV]](s32)
97  ; GCN:   $vgpr1 = COPY [[UV1]](s32)
98  ; GCN:   $vgpr2 = COPY [[UV2]](s32)
99  ; GCN:   $vgpr3 = COPY [[UV3]](s32)
100  ; GCN:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
101  %tex = call <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32(i32 15, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
102  ret <4 x float> %tex
103}
104
105define amdgpu_ps float @image_load_tfe_f32(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
106  ; GCN-LABEL: name: image_load_tfe_f32
107  ; GCN: bb.1 (%ir-block.0):
108  ; GCN:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
109  ; GCN:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
110  ; GCN:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
111  ; GCN:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
112  ; GCN:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
113  ; GCN:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
114  ; GCN:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
115  ; GCN:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
116  ; GCN:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
117  ; GCN:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
118  ; GCN:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
119  ; GCN:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
120  ; GCN:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
121  ; GCN:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
122  ; GCN:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 4 from custom "TargetCustom8")
123  ; GCN:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>)
124  ; GCN:   G_STORE [[UV1]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1)
125  ; GCN:   $vgpr0 = COPY [[UV]](s32)
126  ; GCN:   SI_RETURN_TO_EPILOG implicit $vgpr0
127  %res = call { float, i32 } @llvm.amdgcn.image.load.2d.sl_f32i32s.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0)
128  %tex = extractvalue { float, i32 } %res, 0
129  %tfe = extractvalue { float, i32 } %res, 1
130  store i32 %tfe, i32 addrspace(1)* undef
131  ret float %tex
132}
133
134define amdgpu_ps <2 x float> @image_load_tfe_v2f32(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
135  ; GCN-LABEL: name: image_load_tfe_v2f32
136  ; GCN: bb.1 (%ir-block.0):
137  ; GCN:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
138  ; GCN:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
139  ; GCN:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
140  ; GCN:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
141  ; GCN:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
142  ; GCN:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
143  ; GCN:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
144  ; GCN:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
145  ; GCN:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
146  ; GCN:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
147  ; GCN:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
148  ; GCN:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
149  ; GCN:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
150  ; GCN:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
151  ; GCN:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 8 from custom "TargetCustom8")
152  ; GCN:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<3 x s32>)
153  ; GCN:   G_STORE [[UV2]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1)
154  ; GCN:   $vgpr0 = COPY [[UV]](s32)
155  ; GCN:   $vgpr1 = COPY [[UV1]](s32)
156  ; GCN:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
157  %res = call { <2 x float>, i32 } @llvm.amdgcn.image.load.2d.sl_v2f32i32s.i32(i32 3, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0)
158  %tex = extractvalue { <2 x float>, i32 } %res, 0
159  %tfe = extractvalue { <2 x float>, i32 } %res, 1
160  store i32 %tfe, i32 addrspace(1)* undef
161  ret <2 x float> %tex
162}
163
164define amdgpu_ps <3 x float> @image_load_tfe_v3f32(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
165  ; GCN-LABEL: name: image_load_tfe_v3f32
166  ; GCN: bb.1 (%ir-block.0):
167  ; GCN:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
168  ; GCN:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
169  ; GCN:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
170  ; GCN:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
171  ; GCN:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
172  ; GCN:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
173  ; GCN:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
174  ; GCN:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
175  ; GCN:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
176  ; GCN:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
177  ; GCN:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
178  ; GCN:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
179  ; GCN:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
180  ; GCN:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
181  ; GCN:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 12 from custom "TargetCustom8", align 16)
182  ; GCN:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
183  ; GCN:   G_STORE [[UV3]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1)
184  ; GCN:   $vgpr0 = COPY [[UV]](s32)
185  ; GCN:   $vgpr1 = COPY [[UV1]](s32)
186  ; GCN:   $vgpr2 = COPY [[UV2]](s32)
187  ; GCN:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2
188  %res = call { <3 x float>, i32 } @llvm.amdgcn.image.load.2d.sl_v3f32i32s.i32(i32 7, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0)
189  %tex = extractvalue { <3 x float>, i32 } %res, 0
190  %tfe = extractvalue { <3 x float>, i32 } %res, 1
191  store i32 %tfe, i32 addrspace(1)* undef
192  ret <3 x float> %tex
193}
194
195define amdgpu_ps <4 x float> @image_load_tfe_v4f32(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
196  ; GCN-LABEL: name: image_load_tfe_v4f32
197  ; GCN: bb.1 (%ir-block.0):
198  ; GCN:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
199  ; GCN:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
200  ; GCN:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
201  ; GCN:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
202  ; GCN:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
203  ; GCN:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
204  ; GCN:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
205  ; GCN:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
206  ; GCN:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
207  ; GCN:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
208  ; GCN:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
209  ; GCN:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
210  ; GCN:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
211  ; GCN:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
212  ; GCN:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<5 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 15, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 16 from custom "TargetCustom8")
213  ; GCN:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<5 x s32>)
214  ; GCN:   G_STORE [[UV4]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1)
215  ; GCN:   $vgpr0 = COPY [[UV]](s32)
216  ; GCN:   $vgpr1 = COPY [[UV1]](s32)
217  ; GCN:   $vgpr2 = COPY [[UV2]](s32)
218  ; GCN:   $vgpr3 = COPY [[UV3]](s32)
219  ; GCN:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
220  %res = call { <4 x float>, i32 } @llvm.amdgcn.image.load.2d.sl_v4f32i32s.i32(i32 15, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0)
221  %tex = extractvalue { <4 x float>, i32 } %res, 0
222  %tfe = extractvalue { <4 x float>, i32 } %res, 1
223  store i32 %tfe, i32 addrspace(1)* undef
224  ret <4 x float> %tex
225}
226
227define amdgpu_ps float @image_load_f32_dmask_0000(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
228  ; GCN-LABEL: name: image_load_f32_dmask_0000
229  ; GCN: bb.1 (%ir-block.0):
230  ; GCN:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
231  ; GCN:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
232  ; GCN:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
233  ; GCN:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
234  ; GCN:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
235  ; GCN:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
236  ; GCN:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
237  ; GCN:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
238  ; GCN:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
239  ; GCN:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
240  ; GCN:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
241  ; GCN:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
242  ; GCN:   $vgpr0 = COPY [[DEF]](s32)
243  ; GCN:   SI_RETURN_TO_EPILOG implicit $vgpr0
244  %tex = call float @llvm.amdgcn.image.load.2d.f32.i32(i32 0, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
245  ret float %tex
246}
247
248define amdgpu_ps <2 x float> @image_load_v2f32_dmask_1000(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
249  ; GCN-LABEL: name: image_load_v2f32_dmask_1000
250  ; GCN: bb.1 (%ir-block.0):
251  ; GCN:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
252  ; GCN:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
253  ; GCN:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
254  ; GCN:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
255  ; GCN:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
256  ; GCN:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
257  ; GCN:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
258  ; GCN:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
259  ; GCN:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
260  ; GCN:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
261  ; GCN:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
262  ; GCN:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
263  ; GCN:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
264  ; GCN:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 4 from custom "TargetCustom8")
265  ; GCN:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
266  ; GCN:   $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
267  ; GCN:   $vgpr1 = COPY [[DEF]](s32)
268  ; GCN:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
269  %tex = call <2 x float> @llvm.amdgcn.image.load.2d.v2f32.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
270  ret <2 x float> %tex
271}
272
273define amdgpu_ps <2 x float> @image_load_v2f32_dmask_0000(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
274  ; GCN-LABEL: name: image_load_v2f32_dmask_0000
275  ; GCN: bb.1 (%ir-block.0):
276  ; GCN:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
277  ; GCN:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
278  ; GCN:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
279  ; GCN:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
280  ; GCN:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
281  ; GCN:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
282  ; GCN:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
283  ; GCN:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
284  ; GCN:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
285  ; GCN:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
286  ; GCN:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
287  ; GCN:   [[DEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF
288  ; GCN:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<2 x s32>)
289  ; GCN:   $vgpr0 = COPY [[UV]](s32)
290  ; GCN:   $vgpr1 = COPY [[UV1]](s32)
291  ; GCN:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
292  %tex = call <2 x float> @llvm.amdgcn.image.load.2d.v2f32.i32(i32 0, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
293  ret <2 x float> %tex
294}
295
296define amdgpu_ps <3 x float> @image_load_v3f32_dmask_1100(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
297  ; GCN-LABEL: name: image_load_v3f32_dmask_1100
298  ; GCN: bb.1 (%ir-block.0):
299  ; GCN:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
300  ; GCN:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
301  ; GCN:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
302  ; GCN:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
303  ; GCN:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
304  ; GCN:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
305  ; GCN:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
306  ; GCN:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
307  ; GCN:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
308  ; GCN:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
309  ; GCN:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
310  ; GCN:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
311  ; GCN:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
312  ; GCN:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 8 from custom "TargetCustom8")
313  ; GCN:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>)
314  ; GCN:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
315  ; GCN:   $vgpr0 = COPY [[UV]](s32)
316  ; GCN:   $vgpr1 = COPY [[UV1]](s32)
317  ; GCN:   $vgpr2 = COPY [[DEF]](s32)
318  ; GCN:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2
319  %tex = call <3 x float> @llvm.amdgcn.image.load.2d.v3f32.i32(i32 3, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
320  ret <3 x float> %tex
321}
322
323define amdgpu_ps <3 x float> @image_load_v3f32_dmask_1000(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
324  ; GCN-LABEL: name: image_load_v3f32_dmask_1000
325  ; GCN: bb.1 (%ir-block.0):
326  ; GCN:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
327  ; GCN:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
328  ; GCN:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
329  ; GCN:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
330  ; GCN:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
331  ; GCN:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
332  ; GCN:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
333  ; GCN:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
334  ; GCN:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
335  ; GCN:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
336  ; GCN:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
337  ; GCN:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
338  ; GCN:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
339  ; GCN:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 4 from custom "TargetCustom8")
340  ; GCN:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
341  ; GCN:   $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
342  ; GCN:   $vgpr1 = COPY [[DEF]](s32)
343  ; GCN:   $vgpr2 = COPY [[DEF]](s32)
344  ; GCN:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2
345  %tex = call <3 x float> @llvm.amdgcn.image.load.2d.v3f32.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
346  ret <3 x float> %tex
347}
348
349define amdgpu_ps <3 x float> @image_load_v3f32_dmask_0000(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
350  ; GCN-LABEL: name: image_load_v3f32_dmask_0000
351  ; GCN: bb.1 (%ir-block.0):
352  ; GCN:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
353  ; GCN:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
354  ; GCN:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
355  ; GCN:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
356  ; GCN:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
357  ; GCN:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
358  ; GCN:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
359  ; GCN:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
360  ; GCN:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
361  ; GCN:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
362  ; GCN:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
363  ; GCN:   [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
364  ; GCN:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<3 x s32>)
365  ; GCN:   $vgpr0 = COPY [[UV]](s32)
366  ; GCN:   $vgpr1 = COPY [[UV1]](s32)
367  ; GCN:   $vgpr2 = COPY [[UV2]](s32)
368  ; GCN:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2
369  %tex = call <3 x float> @llvm.amdgcn.image.load.2d.v3f32.i32(i32 0, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
370  ret <3 x float> %tex
371}
372
373define amdgpu_ps <4 x float> @image_load_v4f32_dmask_1110(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
374  ; GCN-LABEL: name: image_load_v4f32_dmask_1110
375  ; GCN: bb.1 (%ir-block.0):
376  ; GCN:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
377  ; GCN:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
378  ; GCN:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
379  ; GCN:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
380  ; GCN:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
381  ; GCN:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
382  ; GCN:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
383  ; GCN:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
384  ; GCN:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
385  ; GCN:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
386  ; GCN:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
387  ; GCN:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
388  ; GCN:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
389  ; GCN:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 12 from custom "TargetCustom8", align 16)
390  ; GCN:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<3 x s32>)
391  ; GCN:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
392  ; GCN:   $vgpr0 = COPY [[UV]](s32)
393  ; GCN:   $vgpr1 = COPY [[UV1]](s32)
394  ; GCN:   $vgpr2 = COPY [[UV2]](s32)
395  ; GCN:   $vgpr3 = COPY [[DEF]](s32)
396  ; GCN:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
397  %tex = call <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32(i32 7, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
398  ret <4 x float> %tex
399}
400
401define amdgpu_ps <4 x float> @image_load_v4f32_dmask_1100(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
402  ; GCN-LABEL: name: image_load_v4f32_dmask_1100
403  ; GCN: bb.1 (%ir-block.0):
404  ; GCN:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
405  ; GCN:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
406  ; GCN:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
407  ; GCN:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
408  ; GCN:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
409  ; GCN:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
410  ; GCN:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
411  ; GCN:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
412  ; GCN:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
413  ; GCN:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
414  ; GCN:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
415  ; GCN:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
416  ; GCN:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
417  ; GCN:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 8 from custom "TargetCustom8")
418  ; GCN:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>)
419  ; GCN:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
420  ; GCN:   $vgpr0 = COPY [[UV]](s32)
421  ; GCN:   $vgpr1 = COPY [[UV1]](s32)
422  ; GCN:   $vgpr2 = COPY [[DEF]](s32)
423  ; GCN:   $vgpr3 = COPY [[DEF]](s32)
424  ; GCN:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
425  %tex = call <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32(i32 3, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
426  ret <4 x float> %tex
427}
428
429define amdgpu_ps <4 x float> @image_load_v4f32_dmask_1000(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
430  ; GCN-LABEL: name: image_load_v4f32_dmask_1000
431  ; GCN: bb.1 (%ir-block.0):
432  ; GCN:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
433  ; GCN:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
434  ; GCN:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
435  ; GCN:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
436  ; GCN:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
437  ; GCN:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
438  ; GCN:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
439  ; GCN:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
440  ; GCN:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
441  ; GCN:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
442  ; GCN:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
443  ; GCN:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
444  ; GCN:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
445  ; GCN:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 4 from custom "TargetCustom8")
446  ; GCN:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
447  ; GCN:   $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
448  ; GCN:   $vgpr1 = COPY [[DEF]](s32)
449  ; GCN:   $vgpr2 = COPY [[DEF]](s32)
450  ; GCN:   $vgpr3 = COPY [[DEF]](s32)
451  ; GCN:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
452  %tex = call <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
453  ret <4 x float> %tex
454}
455
456define amdgpu_ps <4 x float> @image_load_v4f32_dmask_0000(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
457  ; GCN-LABEL: name: image_load_v4f32_dmask_0000
458  ; GCN: bb.1 (%ir-block.0):
459  ; GCN:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
460  ; GCN:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
461  ; GCN:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
462  ; GCN:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
463  ; GCN:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
464  ; GCN:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
465  ; GCN:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
466  ; GCN:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
467  ; GCN:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
468  ; GCN:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
469  ; GCN:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
470  ; GCN:   [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF
471  ; GCN:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<4 x s32>)
472  ; GCN:   $vgpr0 = COPY [[UV]](s32)
473  ; GCN:   $vgpr1 = COPY [[UV1]](s32)
474  ; GCN:   $vgpr2 = COPY [[UV2]](s32)
475  ; GCN:   $vgpr3 = COPY [[UV3]](s32)
476  ; GCN:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
477  %tex = call <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32(i32 0, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
478  ret <4 x float> %tex
479}
480
481define amdgpu_ps float @image_load_tfe_f32_dmask_0000(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
482  ; GCN-LABEL: name: image_load_tfe_f32_dmask_0000
483  ; GCN: bb.1 (%ir-block.0):
484  ; GCN:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
485  ; GCN:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
486  ; GCN:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
487  ; GCN:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
488  ; GCN:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
489  ; GCN:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
490  ; GCN:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
491  ; GCN:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
492  ; GCN:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
493  ; GCN:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
494  ; GCN:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
495  ; GCN:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
496  ; GCN:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
497  ; GCN:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
498  ; GCN:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 4 from custom "TargetCustom8")
499  ; GCN:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>)
500  ; GCN:   G_STORE [[UV1]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1)
501  ; GCN:   $vgpr0 = COPY [[UV]](s32)
502  ; GCN:   SI_RETURN_TO_EPILOG implicit $vgpr0
503  %res = call { float, i32 } @llvm.amdgcn.image.load.2d.sl_f32i32s.i32(i32 0, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0)
504  %tex = extractvalue { float, i32 } %res, 0
505  %tfe = extractvalue { float, i32 } %res, 1
506  store i32 %tfe, i32 addrspace(1)* undef
507  ret float %tex
508}
509
510define amdgpu_ps <2 x float> @image_load_tfe_v2f32_dmask_1000(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
511  ; GCN-LABEL: name: image_load_tfe_v2f32_dmask_1000
512  ; GCN: bb.1 (%ir-block.0):
513  ; GCN:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
514  ; GCN:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
515  ; GCN:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
516  ; GCN:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
517  ; GCN:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
518  ; GCN:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
519  ; GCN:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
520  ; GCN:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
521  ; GCN:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
522  ; GCN:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
523  ; GCN:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
524  ; GCN:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
525  ; GCN:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
526  ; GCN:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
527  ; GCN:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 4 from custom "TargetCustom8")
528  ; GCN:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>)
529  ; GCN:   [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
530  ; GCN:   G_STORE [[UV1]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1)
531  ; GCN:   $vgpr0 = COPY [[UV]](s32)
532  ; GCN:   $vgpr1 = COPY [[DEF1]](s32)
533  ; GCN:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
534  %res = call { <2 x float>, i32 } @llvm.amdgcn.image.load.2d.sl_v2f32i32s.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0)
535  %tex = extractvalue { <2 x float>, i32 } %res, 0
536  %tfe = extractvalue { <2 x float>, i32 } %res, 1
537  store i32 %tfe, i32 addrspace(1)* undef
538  ret <2 x float> %tex
539}
540
541define amdgpu_ps <2 x float> @image_load_tfe_v2f32_dmask_0000(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
542  ; GCN-LABEL: name: image_load_tfe_v2f32_dmask_0000
543  ; GCN: bb.1 (%ir-block.0):
544  ; GCN:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
545  ; GCN:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
546  ; GCN:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
547  ; GCN:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
548  ; GCN:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
549  ; GCN:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
550  ; GCN:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
551  ; GCN:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
552  ; GCN:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
553  ; GCN:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
554  ; GCN:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
555  ; GCN:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
556  ; GCN:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
557  ; GCN:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
558  ; GCN:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 4 from custom "TargetCustom8")
559  ; GCN:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>)
560  ; GCN:   [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
561  ; GCN:   G_STORE [[UV1]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1)
562  ; GCN:   $vgpr0 = COPY [[UV]](s32)
563  ; GCN:   $vgpr1 = COPY [[DEF1]](s32)
564  ; GCN:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
565  %res = call { <2 x float>, i32 } @llvm.amdgcn.image.load.2d.sl_v2f32i32s.i32(i32 0, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0)
566  %tex = extractvalue { <2 x float>, i32 } %res, 0
567  %tfe = extractvalue { <2 x float>, i32 } %res, 1
568  store i32 %tfe, i32 addrspace(1)* undef
569  ret <2 x float> %tex
570}
571
572define amdgpu_ps <3 x float> @image_load_tfe_v3f32_dmask_1100(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
573  ; GCN-LABEL: name: image_load_tfe_v3f32_dmask_1100
574  ; GCN: bb.1 (%ir-block.0):
575  ; GCN:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
576  ; GCN:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
577  ; GCN:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
578  ; GCN:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
579  ; GCN:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
580  ; GCN:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
581  ; GCN:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
582  ; GCN:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
583  ; GCN:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
584  ; GCN:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
585  ; GCN:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
586  ; GCN:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
587  ; GCN:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
588  ; GCN:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
589  ; GCN:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 8 from custom "TargetCustom8")
590  ; GCN:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<3 x s32>)
591  ; GCN:   [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
592  ; GCN:   G_STORE [[UV2]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1)
593  ; GCN:   $vgpr0 = COPY [[UV]](s32)
594  ; GCN:   $vgpr1 = COPY [[UV1]](s32)
595  ; GCN:   $vgpr2 = COPY [[DEF1]](s32)
596  ; GCN:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2
597  %res = call { <3 x float>, i32 } @llvm.amdgcn.image.load.2d.sl_v3f32i32s.i32(i32 3, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0)
598  %tex = extractvalue { <3 x float>, i32 } %res, 0
599  %tfe = extractvalue { <3 x float>, i32 } %res, 1
600  store i32 %tfe, i32 addrspace(1)* undef
601  ret <3 x float> %tex
602}
603
604define amdgpu_ps <3 x float> @image_load_tfe_v3f32_dmask_1000(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
605  ; GCN-LABEL: name: image_load_tfe_v3f32_dmask_1000
606  ; GCN: bb.1 (%ir-block.0):
607  ; GCN:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
608  ; GCN:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
609  ; GCN:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
610  ; GCN:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
611  ; GCN:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
612  ; GCN:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
613  ; GCN:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
614  ; GCN:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
615  ; GCN:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
616  ; GCN:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
617  ; GCN:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
618  ; GCN:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
619  ; GCN:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
620  ; GCN:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
621  ; GCN:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 4 from custom "TargetCustom8")
622  ; GCN:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>)
623  ; GCN:   [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
624  ; GCN:   G_STORE [[UV1]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1)
625  ; GCN:   $vgpr0 = COPY [[UV]](s32)
626  ; GCN:   $vgpr1 = COPY [[DEF1]](s32)
627  ; GCN:   $vgpr2 = COPY [[DEF1]](s32)
628  ; GCN:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2
629  %res = call { <3 x float>, i32 } @llvm.amdgcn.image.load.2d.sl_v3f32i32s.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0)
630  %tex = extractvalue { <3 x float>, i32 } %res, 0
631  %tfe = extractvalue { <3 x float>, i32 } %res, 1
632  store i32 %tfe, i32 addrspace(1)* undef
633  ret <3 x float> %tex
634}
635
636define amdgpu_ps <3 x float> @image_load_tfe_v3f32_dmask_0000(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
637  ; GCN-LABEL: name: image_load_tfe_v3f32_dmask_0000
638  ; GCN: bb.1 (%ir-block.0):
639  ; GCN:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
640  ; GCN:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
641  ; GCN:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
642  ; GCN:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
643  ; GCN:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
644  ; GCN:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
645  ; GCN:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
646  ; GCN:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
647  ; GCN:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
648  ; GCN:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
649  ; GCN:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
650  ; GCN:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
651  ; GCN:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
652  ; GCN:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
653  ; GCN:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 4 from custom "TargetCustom8")
654  ; GCN:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>)
655  ; GCN:   [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
656  ; GCN:   G_STORE [[UV1]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1)
657  ; GCN:   $vgpr0 = COPY [[UV]](s32)
658  ; GCN:   $vgpr1 = COPY [[DEF1]](s32)
659  ; GCN:   $vgpr2 = COPY [[DEF1]](s32)
660  ; GCN:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2
661  %res = call { <3 x float>, i32 } @llvm.amdgcn.image.load.2d.sl_v3f32i32s.i32(i32 0, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0)
662  %tex = extractvalue { <3 x float>, i32 } %res, 0
663  %tfe = extractvalue { <3 x float>, i32 } %res, 1
664  store i32 %tfe, i32 addrspace(1)* undef
665  ret <3 x float> %tex
666}
667
668define amdgpu_ps <4 x float> @image_load_tfe_v4f32_dmask_1110(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
669  ; GCN-LABEL: name: image_load_tfe_v4f32_dmask_1110
670  ; GCN: bb.1 (%ir-block.0):
671  ; GCN:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
672  ; GCN:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
673  ; GCN:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
674  ; GCN:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
675  ; GCN:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
676  ; GCN:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
677  ; GCN:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
678  ; GCN:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
679  ; GCN:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
680  ; GCN:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
681  ; GCN:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
682  ; GCN:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
683  ; GCN:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
684  ; GCN:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
685  ; GCN:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 12 from custom "TargetCustom8", align 16)
686  ; GCN:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
687  ; GCN:   [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
688  ; GCN:   G_STORE [[UV3]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1)
689  ; GCN:   $vgpr0 = COPY [[UV]](s32)
690  ; GCN:   $vgpr1 = COPY [[UV1]](s32)
691  ; GCN:   $vgpr2 = COPY [[UV2]](s32)
692  ; GCN:   $vgpr3 = COPY [[DEF1]](s32)
693  ; GCN:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
694  %res = call { <4 x float>, i32 } @llvm.amdgcn.image.load.2d.sl_v4f32i32s.i32(i32 7, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0)
695  %tex = extractvalue { <4 x float>, i32 } %res, 0
696  %tfe = extractvalue { <4 x float>, i32 } %res, 1
697  store i32 %tfe, i32 addrspace(1)* undef
698  ret <4 x float> %tex
699}
700
701define amdgpu_ps <4 x float> @image_load_tfe_v4f32_dmask_1100(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
702  ; GCN-LABEL: name: image_load_tfe_v4f32_dmask_1100
703  ; GCN: bb.1 (%ir-block.0):
704  ; GCN:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
705  ; GCN:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
706  ; GCN:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
707  ; GCN:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
708  ; GCN:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
709  ; GCN:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
710  ; GCN:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
711  ; GCN:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
712  ; GCN:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
713  ; GCN:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
714  ; GCN:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
715  ; GCN:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
716  ; GCN:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
717  ; GCN:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
718  ; GCN:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 8 from custom "TargetCustom8")
719  ; GCN:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<3 x s32>)
720  ; GCN:   [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
721  ; GCN:   G_STORE [[UV2]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1)
722  ; GCN:   $vgpr0 = COPY [[UV]](s32)
723  ; GCN:   $vgpr1 = COPY [[UV1]](s32)
724  ; GCN:   $vgpr2 = COPY [[DEF1]](s32)
725  ; GCN:   $vgpr3 = COPY [[DEF1]](s32)
726  ; GCN:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
727  %res = call { <4 x float>, i32 } @llvm.amdgcn.image.load.2d.sl_v4f32i32s.i32(i32 3, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0)
728  %tex = extractvalue { <4 x float>, i32 } %res, 0
729  %tfe = extractvalue { <4 x float>, i32 } %res, 1
730  store i32 %tfe, i32 addrspace(1)* undef
731  ret <4 x float> %tex
732}
733
734define amdgpu_ps <4 x float> @image_load_tfe_v4f32_dmask_1000(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
735  ; GCN-LABEL: name: image_load_tfe_v4f32_dmask_1000
736  ; GCN: bb.1 (%ir-block.0):
737  ; GCN:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
738  ; GCN:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
739  ; GCN:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
740  ; GCN:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
741  ; GCN:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
742  ; GCN:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
743  ; GCN:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
744  ; GCN:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
745  ; GCN:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
746  ; GCN:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
747  ; GCN:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
748  ; GCN:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
749  ; GCN:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
750  ; GCN:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
751  ; GCN:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 4 from custom "TargetCustom8")
752  ; GCN:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>)
753  ; GCN:   [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
754  ; GCN:   G_STORE [[UV1]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1)
755  ; GCN:   $vgpr0 = COPY [[UV]](s32)
756  ; GCN:   $vgpr1 = COPY [[DEF1]](s32)
757  ; GCN:   $vgpr2 = COPY [[DEF1]](s32)
758  ; GCN:   $vgpr3 = COPY [[DEF1]](s32)
759  ; GCN:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
760  %res = call { <4 x float>, i32 } @llvm.amdgcn.image.load.2d.sl_v4f32i32s.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0)
761  %tex = extractvalue { <4 x float>, i32 } %res, 0
762  %tfe = extractvalue { <4 x float>, i32 } %res, 1
763  store i32 %tfe, i32 addrspace(1)* undef
764  ret <4 x float> %tex
765}
766
767define amdgpu_ps <4 x float> @image_load_tfe_v4f32_dmask_0000(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
768  ; GCN-LABEL: name: image_load_tfe_v4f32_dmask_0000
769  ; GCN: bb.1 (%ir-block.0):
770  ; GCN:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
771  ; GCN:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
772  ; GCN:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
773  ; GCN:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
774  ; GCN:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
775  ; GCN:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
776  ; GCN:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
777  ; GCN:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
778  ; GCN:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
779  ; GCN:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
780  ; GCN:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
781  ; GCN:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
782  ; GCN:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
783  ; GCN:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
784  ; GCN:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 4 from custom "TargetCustom8")
785  ; GCN:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>)
786  ; GCN:   [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
787  ; GCN:   G_STORE [[UV1]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1)
788  ; GCN:   $vgpr0 = COPY [[UV]](s32)
789  ; GCN:   $vgpr1 = COPY [[DEF1]](s32)
790  ; GCN:   $vgpr2 = COPY [[DEF1]](s32)
791  ; GCN:   $vgpr3 = COPY [[DEF1]](s32)
792  ; GCN:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
793  %res = call { <4 x float>, i32 } @llvm.amdgcn.image.load.2d.sl_v4f32i32s.i32(i32 0, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0)
794  %tex = extractvalue { <4 x float>, i32 } %res, 0
795  %tfe = extractvalue { <4 x float>, i32 } %res, 1
796  store i32 %tfe, i32 addrspace(1)* undef
797  ret <4 x float> %tex
798}
799
800declare float @llvm.amdgcn.image.load.2d.f32.i32(i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
801declare <2 x float> @llvm.amdgcn.image.load.2d.v2f32.i32(i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
802declare <3 x float> @llvm.amdgcn.image.load.2d.v3f32.i32(i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
803declare <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32(i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
804declare { float, i32 } @llvm.amdgcn.image.load.2d.sl_f32i32s.i32(i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
805declare { <2 x float>, i32 } @llvm.amdgcn.image.load.2d.sl_v2f32i32s.i32(i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
806declare { <3 x float>, i32 } @llvm.amdgcn.image.load.2d.sl_v3f32i32s.i32(i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
807declare { <4 x float>, i32 } @llvm.amdgcn.image.load.2d.sl_v4f32i32s.i32(i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
808
809attributes #0 = { nounwind readonly }
810